aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-13 13:52:27 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-13 13:52:27 -0400
commite030dbf91a87da7e8be3be3ca781558695bea683 (patch)
tree4ff2e01621a888be4098ca48c404775e56a55a0d
parent12a22960549979c10a95cc97f8ec63b461c55692 (diff)
parent3039f0735a280b54c7364fbfe6a9287f7f0b510a (diff)
Merge branch 'ioat-md-accel-for-linus' of git://lost.foo-projects.org/~dwillia2/git/iop
* 'ioat-md-accel-for-linus' of git://lost.foo-projects.org/~dwillia2/git/iop: (28 commits) ioatdma: add the unisys "i/oat" pci vendor/device id ARM: Add drivers/dma to arch/arm/Kconfig iop3xx: surface the iop3xx DMA and AAU units to the iop-adma driver iop13xx: surface the iop13xx adma units to the iop-adma driver dmaengine: driver for the iop32x, iop33x, and iop13xx raid engines md: remove raid5 compute_block and compute_parity5 md: handle_stripe5 - request io processing in raid5_run_ops md: handle_stripe5 - add request/completion logic for async expand ops md: handle_stripe5 - add request/completion logic for async read ops md: handle_stripe5 - add request/completion logic for async check ops md: handle_stripe5 - add request/completion logic for async compute ops md: handle_stripe5 - add request/completion logic for async write ops md: common infrastructure for running operations with raid5_run_ops md: raid5_run_ops - run stripe operations outside sh->lock raid5: replace custom debug PRINTKs with standard pr_debug raid5: refactor handle_stripe5 and handle_stripe6 (v3) async_tx: add the async_tx api xor: make 'xor_blocks' a library routine for use with async_tx dmaengine: make clients responsible for managing channels dmaengine: refactor dmaengine around dma_async_tx_descriptor ...
-rw-r--r--Documentation/networking/ip-sysctl.txt6
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/mach-iop13xx/setup.c217
-rw-r--r--arch/arm/mach-iop32x/glantank.c2
-rw-r--r--arch/arm/mach-iop32x/iq31244.c5
-rw-r--r--arch/arm/mach-iop32x/iq80321.c3
-rw-r--r--arch/arm/mach-iop32x/n2100.c2
-rw-r--r--arch/arm/mach-iop33x/iq80331.c3
-rw-r--r--arch/arm/mach-iop33x/iq80332.c3
-rw-r--r--arch/arm/plat-iop/Makefile2
-rw-r--r--arch/arm/plat-iop/adma.c209
-rw-r--r--crypto/Kconfig12
-rw-r--r--crypto/Makefile6
-rw-r--r--crypto/async_tx/Kconfig16
-rw-r--r--crypto/async_tx/Makefile4
-rw-r--r--crypto/async_tx/async_memcpy.c131
-rw-r--r--crypto/async_tx/async_memset.c109
-rw-r--r--crypto/async_tx/async_tx.c497
-rw-r--r--crypto/async_tx/async_xor.c327
-rw-r--r--crypto/xor.c (renamed from drivers/md/xor.c)55
-rw-r--r--drivers/dma/Kconfig12
-rw-r--r--drivers/dma/Makefile1
-rw-r--r--drivers/dma/dmaengine.c419
-rw-r--r--drivers/dma/ioatdma.c369
-rw-r--r--drivers/dma/ioatdma.h16
-rw-r--r--drivers/dma/ioatdma_io.h118
-rw-r--r--drivers/dma/iop-adma.c1467
-rw-r--r--drivers/md/Kconfig2
-rw-r--r--drivers/md/Makefile4
-rw-r--r--drivers/md/md.c2
-rw-r--r--drivers/md/raid5.c2727
-rw-r--r--include/asm-arm/arch-iop13xx/adma.h544
-rw-r--r--include/asm-arm/arch-iop13xx/iop13xx.h38
-rw-r--r--include/asm-arm/arch-iop32x/adma.h5
-rw-r--r--include/asm-arm/arch-iop33x/adma.h5
-rw-r--r--include/asm-arm/hardware/iop3xx-adma.h892
-rw-r--r--include/asm-arm/hardware/iop3xx.h68
-rw-r--r--include/asm-arm/hardware/iop_adma.h118
-rw-r--r--include/linux/async_tx.h156
-rw-r--r--include/linux/dmaengine.h293
-rw-r--r--include/linux/pci_ids.h3
-rw-r--r--include/linux/raid/raid5.h97
-rw-r--r--include/linux/raid/xor.h5
-rw-r--r--net/core/dev.c112
-rw-r--r--net/ipv4/tcp.c26
45 files changed, 7362 insertions, 1748 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 09c184e41cf8..32c2e9da5f3a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -433,6 +433,12 @@ tcp_workaround_signed_windows - BOOLEAN
433 not receive a window scaling option from them. 433 not receive a window scaling option from them.
434 Default: 0 434 Default: 0
435 435
436tcp_dma_copybreak - INTEGER
437 Lower limit, in bytes, of the size of socket reads that will be
438 offloaded to a DMA copy engine, if one is present in the system
439 and CONFIG_NET_DMA is enabled.
440 Default: 4096
441
436CIPSOv4 Variables: 442CIPSOv4 Variables:
437 443
438cipso_cache_enable - BOOLEAN 444cipso_cache_enable - BOOLEAN
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b53e1d4bc486..a44c6da9bf83 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1042,6 +1042,8 @@ source "drivers/mmc/Kconfig"
1042 1042
1043source "drivers/rtc/Kconfig" 1043source "drivers/rtc/Kconfig"
1044 1044
1045source "drivers/dma/Kconfig"
1046
1045endmenu 1047endmenu
1046 1048
1047source "fs/Kconfig" 1049source "fs/Kconfig"
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index bc4871553f6a..bfe0c87e3397 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -25,6 +25,7 @@
25#include <asm/hardware.h> 25#include <asm/hardware.h>
26#include <asm/irq.h> 26#include <asm/irq.h>
27#include <asm/io.h> 27#include <asm/io.h>
28#include <asm/hardware/iop_adma.h>
28 29
29#define IOP13XX_UART_XTAL 33334000 30#define IOP13XX_UART_XTAL 33334000
30#define IOP13XX_SETUP_DEBUG 0 31#define IOP13XX_SETUP_DEBUG 0
@@ -236,19 +237,143 @@ static unsigned long iq8134x_probe_flash_size(void)
236} 237}
237#endif 238#endif
238 239
240/* ADMA Channels */
241static struct resource iop13xx_adma_0_resources[] = {
242 [0] = {
243 .start = IOP13XX_ADMA_PHYS_BASE(0),
244 .end = IOP13XX_ADMA_UPPER_PA(0),
245 .flags = IORESOURCE_MEM,
246 },
247 [1] = {
248 .start = IRQ_IOP13XX_ADMA0_EOT,
249 .end = IRQ_IOP13XX_ADMA0_EOT,
250 .flags = IORESOURCE_IRQ
251 },
252 [2] = {
253 .start = IRQ_IOP13XX_ADMA0_EOC,
254 .end = IRQ_IOP13XX_ADMA0_EOC,
255 .flags = IORESOURCE_IRQ
256 },
257 [3] = {
258 .start = IRQ_IOP13XX_ADMA0_ERR,
259 .end = IRQ_IOP13XX_ADMA0_ERR,
260 .flags = IORESOURCE_IRQ
261 }
262};
263
264static struct resource iop13xx_adma_1_resources[] = {
265 [0] = {
266 .start = IOP13XX_ADMA_PHYS_BASE(1),
267 .end = IOP13XX_ADMA_UPPER_PA(1),
268 .flags = IORESOURCE_MEM,
269 },
270 [1] = {
271 .start = IRQ_IOP13XX_ADMA1_EOT,
272 .end = IRQ_IOP13XX_ADMA1_EOT,
273 .flags = IORESOURCE_IRQ
274 },
275 [2] = {
276 .start = IRQ_IOP13XX_ADMA1_EOC,
277 .end = IRQ_IOP13XX_ADMA1_EOC,
278 .flags = IORESOURCE_IRQ
279 },
280 [3] = {
281 .start = IRQ_IOP13XX_ADMA1_ERR,
282 .end = IRQ_IOP13XX_ADMA1_ERR,
283 .flags = IORESOURCE_IRQ
284 }
285};
286
287static struct resource iop13xx_adma_2_resources[] = {
288 [0] = {
289 .start = IOP13XX_ADMA_PHYS_BASE(2),
290 .end = IOP13XX_ADMA_UPPER_PA(2),
291 .flags = IORESOURCE_MEM,
292 },
293 [1] = {
294 .start = IRQ_IOP13XX_ADMA2_EOT,
295 .end = IRQ_IOP13XX_ADMA2_EOT,
296 .flags = IORESOURCE_IRQ
297 },
298 [2] = {
299 .start = IRQ_IOP13XX_ADMA2_EOC,
300 .end = IRQ_IOP13XX_ADMA2_EOC,
301 .flags = IORESOURCE_IRQ
302 },
303 [3] = {
304 .start = IRQ_IOP13XX_ADMA2_ERR,
305 .end = IRQ_IOP13XX_ADMA2_ERR,
306 .flags = IORESOURCE_IRQ
307 }
308};
309
310static u64 iop13xx_adma_dmamask = DMA_64BIT_MASK;
311static struct iop_adma_platform_data iop13xx_adma_0_data = {
312 .hw_id = 0,
313 .pool_size = PAGE_SIZE,
314};
315
316static struct iop_adma_platform_data iop13xx_adma_1_data = {
317 .hw_id = 1,
318 .pool_size = PAGE_SIZE,
319};
320
321static struct iop_adma_platform_data iop13xx_adma_2_data = {
322 .hw_id = 2,
323 .pool_size = PAGE_SIZE,
324};
325
326/* The ids are fixed up later in iop13xx_platform_init */
327static struct platform_device iop13xx_adma_0_channel = {
328 .name = "iop-adma",
329 .id = 0,
330 .num_resources = 4,
331 .resource = iop13xx_adma_0_resources,
332 .dev = {
333 .dma_mask = &iop13xx_adma_dmamask,
334 .coherent_dma_mask = DMA_64BIT_MASK,
335 .platform_data = (void *) &iop13xx_adma_0_data,
336 },
337};
338
339static struct platform_device iop13xx_adma_1_channel = {
340 .name = "iop-adma",
341 .id = 0,
342 .num_resources = 4,
343 .resource = iop13xx_adma_1_resources,
344 .dev = {
345 .dma_mask = &iop13xx_adma_dmamask,
346 .coherent_dma_mask = DMA_64BIT_MASK,
347 .platform_data = (void *) &iop13xx_adma_1_data,
348 },
349};
350
351static struct platform_device iop13xx_adma_2_channel = {
352 .name = "iop-adma",
353 .id = 0,
354 .num_resources = 4,
355 .resource = iop13xx_adma_2_resources,
356 .dev = {
357 .dma_mask = &iop13xx_adma_dmamask,
358 .coherent_dma_mask = DMA_64BIT_MASK,
359 .platform_data = (void *) &iop13xx_adma_2_data,
360 },
361};
362
239void __init iop13xx_map_io(void) 363void __init iop13xx_map_io(void)
240{ 364{
241 /* Initialize the Static Page Table maps */ 365 /* Initialize the Static Page Table maps */
242 iotable_init(iop13xx_std_desc, ARRAY_SIZE(iop13xx_std_desc)); 366 iotable_init(iop13xx_std_desc, ARRAY_SIZE(iop13xx_std_desc));
243} 367}
244 368
245static int init_uart = 0; 369static int init_uart;
246static int init_i2c = 0; 370static int init_i2c;
371static int init_adma;
247 372
248void __init iop13xx_platform_init(void) 373void __init iop13xx_platform_init(void)
249{ 374{
250 int i; 375 int i;
251 u32 uart_idx, i2c_idx, plat_idx; 376 u32 uart_idx, i2c_idx, adma_idx, plat_idx;
252 struct platform_device *iop13xx_devices[IQ81340_MAX_PLAT_DEVICES]; 377 struct platform_device *iop13xx_devices[IQ81340_MAX_PLAT_DEVICES];
253 378
254 /* set the bases so we can read the device id */ 379 /* set the bases so we can read the device id */
@@ -294,6 +419,12 @@ void __init iop13xx_platform_init(void)
294 } 419 }
295 } 420 }
296 421
422 if (init_adma == IOP13XX_INIT_ADMA_DEFAULT) {
423 init_adma |= IOP13XX_INIT_ADMA_0;
424 init_adma |= IOP13XX_INIT_ADMA_1;
425 init_adma |= IOP13XX_INIT_ADMA_2;
426 }
427
297 plat_idx = 0; 428 plat_idx = 0;
298 uart_idx = 0; 429 uart_idx = 0;
299 i2c_idx = 0; 430 i2c_idx = 0;
@@ -332,6 +463,56 @@ void __init iop13xx_platform_init(void)
332 } 463 }
333 } 464 }
334 465
466 /* initialize adma channel ids and capabilities */
467 adma_idx = 0;
468 for (i = 0; i < IQ81340_NUM_ADMA; i++) {
469 struct iop_adma_platform_data *plat_data;
470 if ((init_adma & (1 << i)) && IOP13XX_SETUP_DEBUG)
471 printk(KERN_INFO
472 "Adding adma%d to platform device list\n", i);
473 switch (init_adma & (1 << i)) {
474 case IOP13XX_INIT_ADMA_0:
475 iop13xx_adma_0_channel.id = adma_idx++;
476 iop13xx_devices[plat_idx++] = &iop13xx_adma_0_channel;
477 plat_data = &iop13xx_adma_0_data;
478 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
479 dma_cap_set(DMA_XOR, plat_data->cap_mask);
480 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
481 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
482 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
483 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
484 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
485 break;
486 case IOP13XX_INIT_ADMA_1:
487 iop13xx_adma_1_channel.id = adma_idx++;
488 iop13xx_devices[plat_idx++] = &iop13xx_adma_1_channel;
489 plat_data = &iop13xx_adma_1_data;
490 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
491 dma_cap_set(DMA_XOR, plat_data->cap_mask);
492 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
493 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
494 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
495 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
496 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
497 break;
498 case IOP13XX_INIT_ADMA_2:
499 iop13xx_adma_2_channel.id = adma_idx++;
500 iop13xx_devices[plat_idx++] = &iop13xx_adma_2_channel;
501 plat_data = &iop13xx_adma_2_data;
502 dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
503 dma_cap_set(DMA_XOR, plat_data->cap_mask);
504 dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
505 dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
506 dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
507 dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
508 dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
509 dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
510 dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
511 dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
512 break;
513 }
514 }
515
335#ifdef CONFIG_MTD_PHYSMAP 516#ifdef CONFIG_MTD_PHYSMAP
336 iq8134x_flash_resource.end = iq8134x_flash_resource.start + 517 iq8134x_flash_resource.end = iq8134x_flash_resource.start +
337 iq8134x_probe_flash_size() - 1; 518 iq8134x_probe_flash_size() - 1;
@@ -399,5 +580,35 @@ static int __init iop13xx_init_i2c_setup(char *str)
399 return 1; 580 return 1;
400} 581}
401 582
583static int __init iop13xx_init_adma_setup(char *str)
584{
585 if (str) {
586 while (*str != '\0') {
587 switch (*str) {
588 case '0':
589 init_adma |= IOP13XX_INIT_ADMA_0;
590 break;
591 case '1':
592 init_adma |= IOP13XX_INIT_ADMA_1;
593 break;
594 case '2':
595 init_adma |= IOP13XX_INIT_ADMA_2;
596 break;
597 case ',':
598 case '=':
599 break;
600 default:
601 PRINTK("\"iop13xx_init_adma\" malformed"
602 " at character: \'%c\'", *str);
603 *(str + 1) = '\0';
604 init_adma = IOP13XX_INIT_ADMA_DEFAULT;
605 }
606 str++;
607 }
608 }
609 return 1;
610}
611
612__setup("iop13xx_init_adma", iop13xx_init_adma_setup);
402__setup("iop13xx_init_uart", iop13xx_init_uart_setup); 613__setup("iop13xx_init_uart", iop13xx_init_uart_setup);
403__setup("iop13xx_init_i2c", iop13xx_init_i2c_setup); 614__setup("iop13xx_init_i2c", iop13xx_init_i2c_setup);
diff --git a/arch/arm/mach-iop32x/glantank.c b/arch/arm/mach-iop32x/glantank.c
index 5776fd884115..2b086ab2668c 100644
--- a/arch/arm/mach-iop32x/glantank.c
+++ b/arch/arm/mach-iop32x/glantank.c
@@ -180,6 +180,8 @@ static void __init glantank_init_machine(void)
180 platform_device_register(&iop3xx_i2c1_device); 180 platform_device_register(&iop3xx_i2c1_device);
181 platform_device_register(&glantank_flash_device); 181 platform_device_register(&glantank_flash_device);
182 platform_device_register(&glantank_serial_device); 182 platform_device_register(&glantank_serial_device);
183 platform_device_register(&iop3xx_dma_0_channel);
184 platform_device_register(&iop3xx_dma_1_channel);
183 185
184 pm_power_off = glantank_power_off; 186 pm_power_off = glantank_power_off;
185} 187}
diff --git a/arch/arm/mach-iop32x/iq31244.c b/arch/arm/mach-iop32x/iq31244.c
index d4eefbea1fe6..98cfa1cd6bdb 100644
--- a/arch/arm/mach-iop32x/iq31244.c
+++ b/arch/arm/mach-iop32x/iq31244.c
@@ -298,9 +298,14 @@ static void __init iq31244_init_machine(void)
298 platform_device_register(&iop3xx_i2c1_device); 298 platform_device_register(&iop3xx_i2c1_device);
299 platform_device_register(&iq31244_flash_device); 299 platform_device_register(&iq31244_flash_device);
300 platform_device_register(&iq31244_serial_device); 300 platform_device_register(&iq31244_serial_device);
301 platform_device_register(&iop3xx_dma_0_channel);
302 platform_device_register(&iop3xx_dma_1_channel);
301 303
302 if (is_ep80219()) 304 if (is_ep80219())
303 pm_power_off = ep80219_power_off; 305 pm_power_off = ep80219_power_off;
306
307 if (!is_80219())
308 platform_device_register(&iop3xx_aau_channel);
304} 309}
305 310
306static int __init force_ep80219_setup(char *str) 311static int __init force_ep80219_setup(char *str)
diff --git a/arch/arm/mach-iop32x/iq80321.c b/arch/arm/mach-iop32x/iq80321.c
index 8d9f49164a84..18ad29f213b2 100644
--- a/arch/arm/mach-iop32x/iq80321.c
+++ b/arch/arm/mach-iop32x/iq80321.c
@@ -181,6 +181,9 @@ static void __init iq80321_init_machine(void)
181 platform_device_register(&iop3xx_i2c1_device); 181 platform_device_register(&iop3xx_i2c1_device);
182 platform_device_register(&iq80321_flash_device); 182 platform_device_register(&iq80321_flash_device);
183 platform_device_register(&iq80321_serial_device); 183 platform_device_register(&iq80321_serial_device);
184 platform_device_register(&iop3xx_dma_0_channel);
185 platform_device_register(&iop3xx_dma_1_channel);
186 platform_device_register(&iop3xx_aau_channel);
184} 187}
185 188
186MACHINE_START(IQ80321, "Intel IQ80321") 189MACHINE_START(IQ80321, "Intel IQ80321")
diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c
index d55005d64781..390a97d39e5a 100644
--- a/arch/arm/mach-iop32x/n2100.c
+++ b/arch/arm/mach-iop32x/n2100.c
@@ -245,6 +245,8 @@ static void __init n2100_init_machine(void)
245 platform_device_register(&iop3xx_i2c0_device); 245 platform_device_register(&iop3xx_i2c0_device);
246 platform_device_register(&n2100_flash_device); 246 platform_device_register(&n2100_flash_device);
247 platform_device_register(&n2100_serial_device); 247 platform_device_register(&n2100_serial_device);
248 platform_device_register(&iop3xx_dma_0_channel);
249 platform_device_register(&iop3xx_dma_1_channel);
248 250
249 pm_power_off = n2100_power_off; 251 pm_power_off = n2100_power_off;
250 252
diff --git a/arch/arm/mach-iop33x/iq80331.c b/arch/arm/mach-iop33x/iq80331.c
index 2b063180687a..433188ebff2a 100644
--- a/arch/arm/mach-iop33x/iq80331.c
+++ b/arch/arm/mach-iop33x/iq80331.c
@@ -136,6 +136,9 @@ static void __init iq80331_init_machine(void)
136 platform_device_register(&iop33x_uart0_device); 136 platform_device_register(&iop33x_uart0_device);
137 platform_device_register(&iop33x_uart1_device); 137 platform_device_register(&iop33x_uart1_device);
138 platform_device_register(&iq80331_flash_device); 138 platform_device_register(&iq80331_flash_device);
139 platform_device_register(&iop3xx_dma_0_channel);
140 platform_device_register(&iop3xx_dma_1_channel);
141 platform_device_register(&iop3xx_aau_channel);
139} 142}
140 143
141MACHINE_START(IQ80331, "Intel IQ80331") 144MACHINE_START(IQ80331, "Intel IQ80331")
diff --git a/arch/arm/mach-iop33x/iq80332.c b/arch/arm/mach-iop33x/iq80332.c
index 7889ce3cb08e..416c09564cc6 100644
--- a/arch/arm/mach-iop33x/iq80332.c
+++ b/arch/arm/mach-iop33x/iq80332.c
@@ -136,6 +136,9 @@ static void __init iq80332_init_machine(void)
136 platform_device_register(&iop33x_uart0_device); 136 platform_device_register(&iop33x_uart0_device);
137 platform_device_register(&iop33x_uart1_device); 137 platform_device_register(&iop33x_uart1_device);
138 platform_device_register(&iq80332_flash_device); 138 platform_device_register(&iq80332_flash_device);
139 platform_device_register(&iop3xx_dma_0_channel);
140 platform_device_register(&iop3xx_dma_1_channel);
141 platform_device_register(&iop3xx_aau_channel);
139} 142}
140 143
141MACHINE_START(IQ80332, "Intel IQ80332") 144MACHINE_START(IQ80332, "Intel IQ80332")
diff --git a/arch/arm/plat-iop/Makefile b/arch/arm/plat-iop/Makefile
index 4d2b1da3cd82..36bff0325959 100644
--- a/arch/arm/plat-iop/Makefile
+++ b/arch/arm/plat-iop/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_ARCH_IOP32X) += setup.o
12obj-$(CONFIG_ARCH_IOP32X) += time.o 12obj-$(CONFIG_ARCH_IOP32X) += time.o
13obj-$(CONFIG_ARCH_IOP32X) += io.o 13obj-$(CONFIG_ARCH_IOP32X) += io.o
14obj-$(CONFIG_ARCH_IOP32X) += cp6.o 14obj-$(CONFIG_ARCH_IOP32X) += cp6.o
15obj-$(CONFIG_ARCH_IOP32X) += adma.o
15 16
16# IOP33X 17# IOP33X
17obj-$(CONFIG_ARCH_IOP33X) += gpio.o 18obj-$(CONFIG_ARCH_IOP33X) += gpio.o
@@ -21,6 +22,7 @@ obj-$(CONFIG_ARCH_IOP33X) += setup.o
21obj-$(CONFIG_ARCH_IOP33X) += time.o 22obj-$(CONFIG_ARCH_IOP33X) += time.o
22obj-$(CONFIG_ARCH_IOP33X) += io.o 23obj-$(CONFIG_ARCH_IOP33X) += io.o
23obj-$(CONFIG_ARCH_IOP33X) += cp6.o 24obj-$(CONFIG_ARCH_IOP33X) += cp6.o
25obj-$(CONFIG_ARCH_IOP33X) += adma.o
24 26
25# IOP13XX 27# IOP13XX
26obj-$(CONFIG_ARCH_IOP13XX) += cp6.o 28obj-$(CONFIG_ARCH_IOP13XX) += cp6.o
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
new file mode 100644
index 000000000000..53c5e9a52eb1
--- /dev/null
+++ b/arch/arm/plat-iop/adma.c
@@ -0,0 +1,209 @@
1/*
2 * platform device definitions for the iop3xx dma/xor engines
3 * Copyright © 2006, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 */
19#include <linux/platform_device.h>
20#include <asm/hardware/iop3xx.h>
21#include <linux/dma-mapping.h>
22#include <asm/arch/adma.h>
23#include <asm/hardware/iop_adma.h>
24
25#ifdef CONFIG_ARCH_IOP32X
26#define IRQ_DMA0_EOT IRQ_IOP32X_DMA0_EOT
27#define IRQ_DMA0_EOC IRQ_IOP32X_DMA0_EOC
28#define IRQ_DMA0_ERR IRQ_IOP32X_DMA0_ERR
29
30#define IRQ_DMA1_EOT IRQ_IOP32X_DMA1_EOT
31#define IRQ_DMA1_EOC IRQ_IOP32X_DMA1_EOC
32#define IRQ_DMA1_ERR IRQ_IOP32X_DMA1_ERR
33
34#define IRQ_AA_EOT IRQ_IOP32X_AA_EOT
35#define IRQ_AA_EOC IRQ_IOP32X_AA_EOC
36#define IRQ_AA_ERR IRQ_IOP32X_AA_ERR
37#endif
38#ifdef CONFIG_ARCH_IOP33X
39#define IRQ_DMA0_EOT IRQ_IOP33X_DMA0_EOT
40#define IRQ_DMA0_EOC IRQ_IOP33X_DMA0_EOC
41#define IRQ_DMA0_ERR IRQ_IOP33X_DMA0_ERR
42
43#define IRQ_DMA1_EOT IRQ_IOP33X_DMA1_EOT
44#define IRQ_DMA1_EOC IRQ_IOP33X_DMA1_EOC
45#define IRQ_DMA1_ERR IRQ_IOP33X_DMA1_ERR
46
47#define IRQ_AA_EOT IRQ_IOP33X_AA_EOT
48#define IRQ_AA_EOC IRQ_IOP33X_AA_EOC
49#define IRQ_AA_ERR IRQ_IOP33X_AA_ERR
50#endif
51/* AAU and DMA Channels */
52static struct resource iop3xx_dma_0_resources[] = {
53 [0] = {
54 .start = IOP3XX_DMA_PHYS_BASE(0),
55 .end = IOP3XX_DMA_UPPER_PA(0),
56 .flags = IORESOURCE_MEM,
57 },
58 [1] = {
59 .start = IRQ_DMA0_EOT,
60 .end = IRQ_DMA0_EOT,
61 .flags = IORESOURCE_IRQ
62 },
63 [2] = {
64 .start = IRQ_DMA0_EOC,
65 .end = IRQ_DMA0_EOC,
66 .flags = IORESOURCE_IRQ
67 },
68 [3] = {
69 .start = IRQ_DMA0_ERR,
70 .end = IRQ_DMA0_ERR,
71 .flags = IORESOURCE_IRQ
72 }
73};
74
75static struct resource iop3xx_dma_1_resources[] = {
76 [0] = {
77 .start = IOP3XX_DMA_PHYS_BASE(1),
78 .end = IOP3XX_DMA_UPPER_PA(1),
79 .flags = IORESOURCE_MEM,
80 },
81 [1] = {
82 .start = IRQ_DMA1_EOT,
83 .end = IRQ_DMA1_EOT,
84 .flags = IORESOURCE_IRQ
85 },
86 [2] = {
87 .start = IRQ_DMA1_EOC,
88 .end = IRQ_DMA1_EOC,
89 .flags = IORESOURCE_IRQ
90 },
91 [3] = {
92 .start = IRQ_DMA1_ERR,
93 .end = IRQ_DMA1_ERR,
94 .flags = IORESOURCE_IRQ
95 }
96};
97
98
99static struct resource iop3xx_aau_resources[] = {
100 [0] = {
101 .start = IOP3XX_AAU_PHYS_BASE,
102 .end = IOP3XX_AAU_UPPER_PA,
103 .flags = IORESOURCE_MEM,
104 },
105 [1] = {
106 .start = IRQ_AA_EOT,
107 .end = IRQ_AA_EOT,
108 .flags = IORESOURCE_IRQ
109 },
110 [2] = {
111 .start = IRQ_AA_EOC,
112 .end = IRQ_AA_EOC,
113 .flags = IORESOURCE_IRQ
114 },
115 [3] = {
116 .start = IRQ_AA_ERR,
117 .end = IRQ_AA_ERR,
118 .flags = IORESOURCE_IRQ
119 }
120};
121
122static u64 iop3xx_adma_dmamask = DMA_32BIT_MASK;
123
124static struct iop_adma_platform_data iop3xx_dma_0_data = {
125 .hw_id = DMA0_ID,
126 .pool_size = PAGE_SIZE,
127};
128
129static struct iop_adma_platform_data iop3xx_dma_1_data = {
130 .hw_id = DMA1_ID,
131 .pool_size = PAGE_SIZE,
132};
133
134static struct iop_adma_platform_data iop3xx_aau_data = {
135 .hw_id = AAU_ID,
136 .pool_size = 3 * PAGE_SIZE,
137};
138
139struct platform_device iop3xx_dma_0_channel = {
140 .name = "iop-adma",
141 .id = 0,
142 .num_resources = 4,
143 .resource = iop3xx_dma_0_resources,
144 .dev = {
145 .dma_mask = &iop3xx_adma_dmamask,
146 .coherent_dma_mask = DMA_64BIT_MASK,
147 .platform_data = (void *) &iop3xx_dma_0_data,
148 },
149};
150
151struct platform_device iop3xx_dma_1_channel = {
152 .name = "iop-adma",
153 .id = 1,
154 .num_resources = 4,
155 .resource = iop3xx_dma_1_resources,
156 .dev = {
157 .dma_mask = &iop3xx_adma_dmamask,
158 .coherent_dma_mask = DMA_64BIT_MASK,
159 .platform_data = (void *) &iop3xx_dma_1_data,
160 },
161};
162
163struct platform_device iop3xx_aau_channel = {
164 .name = "iop-adma",
165 .id = 2,
166 .num_resources = 4,
167 .resource = iop3xx_aau_resources,
168 .dev = {
169 .dma_mask = &iop3xx_adma_dmamask,
170 .coherent_dma_mask = DMA_64BIT_MASK,
171 .platform_data = (void *) &iop3xx_aau_data,
172 },
173};
174
175static int __init iop3xx_adma_cap_init(void)
176{
177 #ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */
178 dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
179 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
180 #else
181 dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
182 dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
183 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
184 #endif
185
186 #ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */
187 dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
188 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
189 #else
190 dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
191 dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
192 dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
193 #endif
194
195 #ifdef CONFIG_ARCH_IOP32X /* the 32x AAU does not perform zero sum */
196 dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
197 dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
198 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
199 #else
200 dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
201 dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask);
202 dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
203 dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
204 #endif
205
206 return 0;
207}
208
209arch_initcall(iop3xx_adma_cap_init);
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 4ca0ab3448d9..07090e9f9bcf 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1,7 +1,17 @@
1# 1#
2# Cryptographic API Configuration 2# Generic algorithms support
3#
4config XOR_BLOCKS
5 tristate
6
3# 7#
8# async_tx api: hardware offloaded memory transfer/transform support
9#
10source "crypto/async_tx/Kconfig"
4 11
12#
13# Cryptographic API Configuration
14#
5menu "Cryptographic options" 15menu "Cryptographic options"
6 16
7config CRYPTO 17config CRYPTO
diff --git a/crypto/Makefile b/crypto/Makefile
index cce46a1c9dc7..0cf17f1ea151 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -50,3 +50,9 @@ obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
50obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o 50obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
51 51
52obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o 52obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
53
54#
55# generic algorithms and the async_tx api
56#
57obj-$(CONFIG_XOR_BLOCKS) += xor.o
58obj-$(CONFIG_ASYNC_CORE) += async_tx/
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
new file mode 100644
index 000000000000..d8fb39145986
--- /dev/null
+++ b/crypto/async_tx/Kconfig
@@ -0,0 +1,16 @@
1config ASYNC_CORE
2 tristate
3
4config ASYNC_MEMCPY
5 tristate
6 select ASYNC_CORE
7
8config ASYNC_XOR
9 tristate
10 select ASYNC_CORE
11 select XOR_BLOCKS
12
13config ASYNC_MEMSET
14 tristate
15 select ASYNC_CORE
16
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
new file mode 100644
index 000000000000..27baa7d52fbc
--- /dev/null
+++ b/crypto/async_tx/Makefile
@@ -0,0 +1,4 @@
1obj-$(CONFIG_ASYNC_CORE) += async_tx.o
2obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
3obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
4obj-$(CONFIG_ASYNC_XOR) += async_xor.o
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
new file mode 100644
index 000000000000..a973f4ef897d
--- /dev/null
+++ b/crypto/async_tx/async_memcpy.c
@@ -0,0 +1,131 @@
1/*
2 * copy offload engine support
3 *
4 * Copyright © 2006, Intel Corporation.
5 *
6 * Dan Williams <dan.j.williams@intel.com>
7 *
8 * with architecture considerations by:
9 * Neil Brown <neilb@suse.de>
10 * Jeff Garzik <jeff@garzik.org>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 */
26#include <linux/kernel.h>
27#include <linux/highmem.h>
28#include <linux/mm.h>
29#include <linux/dma-mapping.h>
30#include <linux/async_tx.h>
31
32/**
33 * async_memcpy - attempt to copy memory with a dma engine.
34 * @dest: destination page
35 * @src: src page
36 * @offset: offset in pages to start transaction
37 * @len: length in bytes
38 * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
39 * ASYNC_TX_KMAP_SRC, ASYNC_TX_KMAP_DST
40 * @depend_tx: memcpy depends on the result of this transaction
41 * @cb_fn: function to call when the memcpy completes
42 * @cb_param: parameter to pass to the callback routine
43 */
44struct dma_async_tx_descriptor *
45async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
46 unsigned int src_offset, size_t len, enum async_tx_flags flags,
47 struct dma_async_tx_descriptor *depend_tx,
48 dma_async_tx_callback cb_fn, void *cb_param)
49{
50 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY);
51 struct dma_device *device = chan ? chan->device : NULL;
52 int int_en = cb_fn ? 1 : 0;
53 struct dma_async_tx_descriptor *tx = device ?
54 device->device_prep_dma_memcpy(chan, len,
55 int_en) : NULL;
56
57 if (tx) { /* run the memcpy asynchronously */
58 dma_addr_t addr;
59 enum dma_data_direction dir;
60
61 pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
62
63 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
64 DMA_NONE : DMA_FROM_DEVICE;
65
66 addr = dma_map_page(device->dev, dest, dest_offset, len, dir);
67 tx->tx_set_dest(addr, tx, 0);
68
69 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
70 DMA_NONE : DMA_TO_DEVICE;
71
72 addr = dma_map_page(device->dev, src, src_offset, len, dir);
73 tx->tx_set_src(addr, tx, 0);
74
75 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
76 } else { /* run the memcpy synchronously */
77 void *dest_buf, *src_buf;
78 pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
79
80 /* wait for any prerequisite operations */
81 if (depend_tx) {
82 /* if ack is already set then we cannot be sure
83 * we are referring to the correct operation
84 */
85 BUG_ON(depend_tx->ack);
86 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
87 panic("%s: DMA_ERROR waiting for depend_tx\n",
88 __FUNCTION__);
89 }
90
91 if (flags & ASYNC_TX_KMAP_DST)
92 dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
93 else
94 dest_buf = page_address(dest) + dest_offset;
95
96 if (flags & ASYNC_TX_KMAP_SRC)
97 src_buf = kmap_atomic(src, KM_USER0) + src_offset;
98 else
99 src_buf = page_address(src) + src_offset;
100
101 memcpy(dest_buf, src_buf, len);
102
103 if (flags & ASYNC_TX_KMAP_DST)
104 kunmap_atomic(dest_buf, KM_USER0);
105
106 if (flags & ASYNC_TX_KMAP_SRC)
107 kunmap_atomic(src_buf, KM_USER0);
108
109 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
110 }
111
112 return tx;
113}
114EXPORT_SYMBOL_GPL(async_memcpy);
115
116static int __init async_memcpy_init(void)
117{
118 return 0;
119}
120
121static void __exit async_memcpy_exit(void)
122{
123 do { } while (0);
124}
125
126module_init(async_memcpy_init);
127module_exit(async_memcpy_exit);
128
129MODULE_AUTHOR("Intel Corporation");
130MODULE_DESCRIPTION("asynchronous memcpy api");
131MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
new file mode 100644
index 000000000000..66ef6351202e
--- /dev/null
+++ b/crypto/async_tx/async_memset.c
@@ -0,0 +1,109 @@
1/*
2 * memory fill offload engine support
3 *
4 * Copyright © 2006, Intel Corporation.
5 *
6 * Dan Williams <dan.j.williams@intel.com>
7 *
8 * with architecture considerations by:
9 * Neil Brown <neilb@suse.de>
10 * Jeff Garzik <jeff@garzik.org>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 */
26#include <linux/kernel.h>
27#include <linux/interrupt.h>
28#include <linux/mm.h>
29#include <linux/dma-mapping.h>
30#include <linux/async_tx.h>
31
32/**
33 * async_memset - attempt to fill memory with a dma engine.
34 * @dest: destination page
35 * @val: fill value
36 * @offset: offset in pages to start transaction
37 * @len: length in bytes
38 * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
39 * @depend_tx: memset depends on the result of this transaction
40 * @cb_fn: function to call when the memcpy completes
41 * @cb_param: parameter to pass to the callback routine
42 */
43struct dma_async_tx_descriptor *
44async_memset(struct page *dest, int val, unsigned int offset,
45 size_t len, enum async_tx_flags flags,
46 struct dma_async_tx_descriptor *depend_tx,
47 dma_async_tx_callback cb_fn, void *cb_param)
48{
49 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET);
50 struct dma_device *device = chan ? chan->device : NULL;
51 int int_en = cb_fn ? 1 : 0;
52 struct dma_async_tx_descriptor *tx = device ?
53 device->device_prep_dma_memset(chan, val, len,
54 int_en) : NULL;
55
56 if (tx) { /* run the memset asynchronously */
57 dma_addr_t dma_addr;
58 enum dma_data_direction dir;
59
60 pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
61 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
62 DMA_NONE : DMA_FROM_DEVICE;
63
64 dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
65 tx->tx_set_dest(dma_addr, tx, 0);
66
67 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
68 } else { /* run the memset synchronously */
69 void *dest_buf;
70 pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
71
72 dest_buf = (void *) (((char *) page_address(dest)) + offset);
73
74 /* wait for any prerequisite operations */
75 if (depend_tx) {
76 /* if ack is already set then we cannot be sure
77 * we are referring to the correct operation
78 */
79 BUG_ON(depend_tx->ack);
80 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
81 panic("%s: DMA_ERROR waiting for depend_tx\n",
82 __FUNCTION__);
83 }
84
85 memset(dest_buf, val, len);
86
87 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
88 }
89
90 return tx;
91}
92EXPORT_SYMBOL_GPL(async_memset);
93
94static int __init async_memset_init(void)
95{
96 return 0;
97}
98
99static void __exit async_memset_exit(void)
100{
101 do { } while (0);
102}
103
104module_init(async_memset_init);
105module_exit(async_memset_exit);
106
107MODULE_AUTHOR("Intel Corporation");
108MODULE_DESCRIPTION("asynchronous memset api");
109MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
new file mode 100644
index 000000000000..035007145e78
--- /dev/null
+++ b/crypto/async_tx/async_tx.c
@@ -0,0 +1,497 @@
1/*
2 * core routines for the asynchronous memory transfer/transform api
3 *
4 * Copyright © 2006, Intel Corporation.
5 *
6 * Dan Williams <dan.j.williams@intel.com>
7 *
8 * with architecture considerations by:
9 * Neil Brown <neilb@suse.de>
10 * Jeff Garzik <jeff@garzik.org>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 */
26#include <linux/kernel.h>
27#include <linux/async_tx.h>
28
29#ifdef CONFIG_DMA_ENGINE
30static enum dma_state_client
31dma_channel_add_remove(struct dma_client *client,
32 struct dma_chan *chan, enum dma_state state);
33
34static struct dma_client async_tx_dma = {
35 .event_callback = dma_channel_add_remove,
36 /* .cap_mask == 0 defaults to all channels */
37};
38
39/**
40 * dma_cap_mask_all - enable iteration over all operation types
41 */
42static dma_cap_mask_t dma_cap_mask_all;
43
44/**
45 * chan_ref_percpu - tracks channel allocations per core/opertion
46 */
47struct chan_ref_percpu {
48 struct dma_chan_ref *ref;
49};
50
51static int channel_table_initialized;
52static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
53
54/**
55 * async_tx_lock - protect modification of async_tx_master_list and serialize
56 * rebalance operations
57 */
58static spinlock_t async_tx_lock;
59
60static struct list_head
61async_tx_master_list = LIST_HEAD_INIT(async_tx_master_list);
62
63/* async_tx_issue_pending_all - start all transactions on all channels */
64void async_tx_issue_pending_all(void)
65{
66 struct dma_chan_ref *ref;
67
68 rcu_read_lock();
69 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
70 ref->chan->device->device_issue_pending(ref->chan);
71 rcu_read_unlock();
72}
73EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
74
75/* dma_wait_for_async_tx - spin wait for a transcation to complete
76 * @tx: transaction to wait on
77 */
78enum dma_status
79dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
80{
81 enum dma_status status;
82 struct dma_async_tx_descriptor *iter;
83
84 if (!tx)
85 return DMA_SUCCESS;
86
87 /* poll through the dependency chain, return when tx is complete */
88 do {
89 iter = tx;
90 while (iter->cookie == -EBUSY)
91 iter = iter->parent;
92
93 status = dma_sync_wait(iter->chan, iter->cookie);
94 } while (status == DMA_IN_PROGRESS || (iter != tx));
95
96 return status;
97}
98EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
99
100/* async_tx_run_dependencies - helper routine for dma drivers to process
101 * (start) dependent operations on their target channel
102 * @tx: transaction with dependencies
103 */
104void
105async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
106{
107 struct dma_async_tx_descriptor *dep_tx, *_dep_tx;
108 struct dma_device *dev;
109 struct dma_chan *chan;
110
111 list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list,
112 depend_node) {
113 chan = dep_tx->chan;
114 dev = chan->device;
115 /* we can't depend on ourselves */
116 BUG_ON(chan == tx->chan);
117 list_del(&dep_tx->depend_node);
118 tx->tx_submit(dep_tx);
119
120 /* we need to poke the engine as client code does not
121 * know about dependency submission events
122 */
123 dev->device_issue_pending(chan);
124 }
125}
126EXPORT_SYMBOL_GPL(async_tx_run_dependencies);
127
128static void
129free_dma_chan_ref(struct rcu_head *rcu)
130{
131 struct dma_chan_ref *ref;
132 ref = container_of(rcu, struct dma_chan_ref, rcu);
133 kfree(ref);
134}
135
136static void
137init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
138{
139 INIT_LIST_HEAD(&ref->node);
140 INIT_RCU_HEAD(&ref->rcu);
141 ref->chan = chan;
142 atomic_set(&ref->count, 0);
143}
144
145/**
146 * get_chan_ref_by_cap - returns the nth channel of the given capability
147 * defaults to returning the channel with the desired capability and the
148 * lowest reference count if the index can not be satisfied
149 * @cap: capability to match
150 * @index: nth channel desired, passing -1 has the effect of forcing the
151 * default return value
152 */
153static struct dma_chan_ref *
154get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
155{
156 struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
157
158 rcu_read_lock();
159 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
160 if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
161 if (!min_ref)
162 min_ref = ref;
163 else if (atomic_read(&ref->count) <
164 atomic_read(&min_ref->count))
165 min_ref = ref;
166
167 if (index-- == 0) {
168 ret_ref = ref;
169 break;
170 }
171 }
172 rcu_read_unlock();
173
174 if (!ret_ref)
175 ret_ref = min_ref;
176
177 if (ret_ref)
178 atomic_inc(&ret_ref->count);
179
180 return ret_ref;
181}
182
183/**
184 * async_tx_rebalance - redistribute the available channels, optimize
185 * for cpu isolation in the SMP case, and opertaion isolation in the
186 * uniprocessor case
187 */
188static void async_tx_rebalance(void)
189{
190 int cpu, cap, cpu_idx = 0;
191 unsigned long flags;
192
193 if (!channel_table_initialized)
194 return;
195
196 spin_lock_irqsave(&async_tx_lock, flags);
197
198 /* undo the last distribution */
199 for_each_dma_cap_mask(cap, dma_cap_mask_all)
200 for_each_possible_cpu(cpu) {
201 struct dma_chan_ref *ref =
202 per_cpu_ptr(channel_table[cap], cpu)->ref;
203 if (ref) {
204 atomic_set(&ref->count, 0);
205 per_cpu_ptr(channel_table[cap], cpu)->ref =
206 NULL;
207 }
208 }
209
210 for_each_dma_cap_mask(cap, dma_cap_mask_all)
211 for_each_online_cpu(cpu) {
212 struct dma_chan_ref *new;
213 if (NR_CPUS > 1)
214 new = get_chan_ref_by_cap(cap, cpu_idx++);
215 else
216 new = get_chan_ref_by_cap(cap, -1);
217
218 per_cpu_ptr(channel_table[cap], cpu)->ref = new;
219 }
220
221 spin_unlock_irqrestore(&async_tx_lock, flags);
222}
223
224static enum dma_state_client
225dma_channel_add_remove(struct dma_client *client,
226 struct dma_chan *chan, enum dma_state state)
227{
228 unsigned long found, flags;
229 struct dma_chan_ref *master_ref, *ref;
230 enum dma_state_client ack = DMA_DUP; /* default: take no action */
231
232 switch (state) {
233 case DMA_RESOURCE_AVAILABLE:
234 found = 0;
235 rcu_read_lock();
236 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
237 if (ref->chan == chan) {
238 found = 1;
239 break;
240 }
241 rcu_read_unlock();
242
243 pr_debug("async_tx: dma resource available [%s]\n",
244 found ? "old" : "new");
245
246 if (!found)
247 ack = DMA_ACK;
248 else
249 break;
250
251 /* add the channel to the generic management list */
252 master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
253 if (master_ref) {
254 /* keep a reference until async_tx is unloaded */
255 dma_chan_get(chan);
256 init_dma_chan_ref(master_ref, chan);
257 spin_lock_irqsave(&async_tx_lock, flags);
258 list_add_tail_rcu(&master_ref->node,
259 &async_tx_master_list);
260 spin_unlock_irqrestore(&async_tx_lock,
261 flags);
262 } else {
263 printk(KERN_WARNING "async_tx: unable to create"
264 " new master entry in response to"
265 " a DMA_RESOURCE_ADDED event"
266 " (-ENOMEM)\n");
267 return 0;
268 }
269
270 async_tx_rebalance();
271 break;
272 case DMA_RESOURCE_REMOVED:
273 found = 0;
274 spin_lock_irqsave(&async_tx_lock, flags);
275 list_for_each_entry_rcu(ref, &async_tx_master_list, node)
276 if (ref->chan == chan) {
277 /* permit backing devices to go away */
278 dma_chan_put(ref->chan);
279 list_del_rcu(&ref->node);
280 call_rcu(&ref->rcu, free_dma_chan_ref);
281 found = 1;
282 break;
283 }
284 spin_unlock_irqrestore(&async_tx_lock, flags);
285
286 pr_debug("async_tx: dma resource removed [%s]\n",
287 found ? "ours" : "not ours");
288
289 if (found)
290 ack = DMA_ACK;
291 else
292 break;
293
294 async_tx_rebalance();
295 break;
296 case DMA_RESOURCE_SUSPEND:
297 case DMA_RESOURCE_RESUME:
298 printk(KERN_WARNING "async_tx: does not support dma channel"
299 " suspend/resume\n");
300 break;
301 default:
302 BUG();
303 }
304
305 return ack;
306}
307
308static int __init
309async_tx_init(void)
310{
311 enum dma_transaction_type cap;
312
313 spin_lock_init(&async_tx_lock);
314 bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
315
316 /* an interrupt will never be an explicit operation type.
317 * clearing this bit prevents allocation to a slot in 'channel_table'
318 */
319 clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
320
321 for_each_dma_cap_mask(cap, dma_cap_mask_all) {
322 channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
323 if (!channel_table[cap])
324 goto err;
325 }
326
327 channel_table_initialized = 1;
328 dma_async_client_register(&async_tx_dma);
329 dma_async_client_chan_request(&async_tx_dma);
330
331 printk(KERN_INFO "async_tx: api initialized (async)\n");
332
333 return 0;
334err:
335 printk(KERN_ERR "async_tx: initialization failure\n");
336
337 while (--cap >= 0)
338 free_percpu(channel_table[cap]);
339
340 return 1;
341}
342
343static void __exit async_tx_exit(void)
344{
345 enum dma_transaction_type cap;
346
347 channel_table_initialized = 0;
348
349 for_each_dma_cap_mask(cap, dma_cap_mask_all)
350 if (channel_table[cap])
351 free_percpu(channel_table[cap]);
352
353 dma_async_client_unregister(&async_tx_dma);
354}
355
356/**
357 * async_tx_find_channel - find a channel to carry out the operation or let
358 * the transaction execute synchronously
359 * @depend_tx: transaction dependency
360 * @tx_type: transaction type
361 */
362struct dma_chan *
363async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
364 enum dma_transaction_type tx_type)
365{
366 /* see if we can keep the chain on one channel */
367 if (depend_tx &&
368 dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
369 return depend_tx->chan;
370 else if (likely(channel_table_initialized)) {
371 struct dma_chan_ref *ref;
372 int cpu = get_cpu();
373 ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
374 put_cpu();
375 return ref ? ref->chan : NULL;
376 } else
377 return NULL;
378}
379EXPORT_SYMBOL_GPL(async_tx_find_channel);
380#else
381static int __init async_tx_init(void)
382{
383 printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
384 return 0;
385}
386
387static void __exit async_tx_exit(void)
388{
389 do { } while (0);
390}
391#endif
392
393void
394async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
395 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
396 dma_async_tx_callback cb_fn, void *cb_param)
397{
398 tx->callback = cb_fn;
399 tx->callback_param = cb_param;
400
401 /* set this new tx to run after depend_tx if:
402 * 1/ a dependency exists (depend_tx is !NULL)
403 * 2/ the tx can not be submitted to the current channel
404 */
405 if (depend_tx && depend_tx->chan != chan) {
406 /* if ack is already set then we cannot be sure
407 * we are referring to the correct operation
408 */
409 BUG_ON(depend_tx->ack);
410
411 tx->parent = depend_tx;
412 spin_lock_bh(&depend_tx->lock);
413 list_add_tail(&tx->depend_node, &depend_tx->depend_list);
414 if (depend_tx->cookie == 0) {
415 struct dma_chan *dep_chan = depend_tx->chan;
416 struct dma_device *dep_dev = dep_chan->device;
417 dep_dev->device_dependency_added(dep_chan);
418 }
419 spin_unlock_bh(&depend_tx->lock);
420
421 /* schedule an interrupt to trigger the channel switch */
422 async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL);
423 } else {
424 tx->parent = NULL;
425 tx->tx_submit(tx);
426 }
427
428 if (flags & ASYNC_TX_ACK)
429 async_tx_ack(tx);
430
431 if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
432 async_tx_ack(depend_tx);
433}
434EXPORT_SYMBOL_GPL(async_tx_submit);
435
436/**
437 * async_trigger_callback - schedules the callback function to be run after
438 * any dependent operations have been completed.
439 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
440 * @depend_tx: 'callback' requires the completion of this transaction
441 * @cb_fn: function to call after depend_tx completes
442 * @cb_param: parameter to pass to the callback routine
443 */
444struct dma_async_tx_descriptor *
445async_trigger_callback(enum async_tx_flags flags,
446 struct dma_async_tx_descriptor *depend_tx,
447 dma_async_tx_callback cb_fn, void *cb_param)
448{
449 struct dma_chan *chan;
450 struct dma_device *device;
451 struct dma_async_tx_descriptor *tx;
452
453 if (depend_tx) {
454 chan = depend_tx->chan;
455 device = chan->device;
456
457 /* see if we can schedule an interrupt
458 * otherwise poll for completion
459 */
460 if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask))
461 device = NULL;
462
463 tx = device ? device->device_prep_dma_interrupt(chan) : NULL;
464 } else
465 tx = NULL;
466
467 if (tx) {
468 pr_debug("%s: (async)\n", __FUNCTION__);
469
470 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
471 } else {
472 pr_debug("%s: (sync)\n", __FUNCTION__);
473
474 /* wait for any prerequisite operations */
475 if (depend_tx) {
476 /* if ack is already set then we cannot be sure
477 * we are referring to the correct operation
478 */
479 BUG_ON(depend_tx->ack);
480 if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
481 panic("%s: DMA_ERROR waiting for depend_tx\n",
482 __FUNCTION__);
483 }
484
485 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
486 }
487
488 return tx;
489}
490EXPORT_SYMBOL_GPL(async_trigger_callback);
491
492module_init(async_tx_init);
493module_exit(async_tx_exit);
494
495MODULE_AUTHOR("Intel Corporation");
496MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
497MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
new file mode 100644
index 000000000000..2575f674dcd5
--- /dev/null
+++ b/crypto/async_tx/async_xor.c
@@ -0,0 +1,327 @@
1/*
2 * xor offload engine api
3 *
4 * Copyright © 2006, Intel Corporation.
5 *
6 * Dan Williams <dan.j.williams@intel.com>
7 *
8 * with architecture considerations by:
9 * Neil Brown <neilb@suse.de>
10 * Jeff Garzik <jeff@garzik.org>
11 *
12 * This program is free software; you can redistribute it and/or modify it
13 * under the terms and conditions of the GNU General Public License,
14 * version 2, as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19 * more details.
20 *
21 * You should have received a copy of the GNU General Public License along with
22 * this program; if not, write to the Free Software Foundation, Inc.,
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 *
25 */
26#include <linux/kernel.h>
27#include <linux/interrupt.h>
28#include <linux/mm.h>
29#include <linux/dma-mapping.h>
30#include <linux/raid/xor.h>
31#include <linux/async_tx.h>
32
33static void
34do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device,
35 struct dma_chan *chan, struct page *dest, struct page **src_list,
36 unsigned int offset, unsigned int src_cnt, size_t len,
37 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
38 dma_async_tx_callback cb_fn, void *cb_param)
39{
40 dma_addr_t dma_addr;
41 enum dma_data_direction dir;
42 int i;
43
44 pr_debug("%s: len: %zu\n", __FUNCTION__, len);
45
46 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
47 DMA_NONE : DMA_FROM_DEVICE;
48
49 dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
50 tx->tx_set_dest(dma_addr, tx, 0);
51
52 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
53 DMA_NONE : DMA_TO_DEVICE;
54
55 for (i = 0; i < src_cnt; i++) {
56 dma_addr = dma_map_page(device->dev, src_list[i],
57 offset, len, dir);
58 tx->tx_set_src(dma_addr, tx, i);
59 }
60
61 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
62}
63
64static void
65do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
66 unsigned int src_cnt, size_t len, enum async_tx_flags flags,
67 struct dma_async_tx_descriptor *depend_tx,
68 dma_async_tx_callback cb_fn, void *cb_param)
69{
70 void *_dest;
71 int i;
72
73 pr_debug("%s: len: %zu\n", __FUNCTION__, len);
74
75 /* reuse the 'src_list' array to convert to buffer pointers */
76 for (i = 0; i < src_cnt; i++)
77 src_list[i] = (struct page *)
78 (page_address(src_list[i]) + offset);
79
80 /* set destination address */
81 _dest = page_address(dest) + offset;
82
83 if (flags & ASYNC_TX_XOR_ZERO_DST)
84 memset(_dest, 0, len);
85
86 xor_blocks(src_cnt, len, _dest,
87 (void **) src_list);
88
89 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
90}
91
92/**
93 * async_xor - attempt to xor a set of blocks with a dma engine.
94 * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
95 * flag must be set to not include dest data in the calculation. The
96 * assumption with dma eninges is that they only use the destination
97 * buffer as a source when it is explicity specified in the source list.
98 * @dest: destination page
99 * @src_list: array of source pages (if the dest is also a source it must be
100 * at index zero). The contents of this array may be overwritten.
101 * @offset: offset in pages to start transaction
102 * @src_cnt: number of source pages
103 * @len: length in bytes
104 * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
105 * ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
106 * @depend_tx: xor depends on the result of this transaction.
107 * @cb_fn: function to call when the xor completes
108 * @cb_param: parameter to pass to the callback routine
109 */
110struct dma_async_tx_descriptor *
111async_xor(struct page *dest, struct page **src_list, unsigned int offset,
112 int src_cnt, size_t len, enum async_tx_flags flags,
113 struct dma_async_tx_descriptor *depend_tx,
114 dma_async_tx_callback cb_fn, void *cb_param)
115{
116 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR);
117 struct dma_device *device = chan ? chan->device : NULL;
118 struct dma_async_tx_descriptor *tx = NULL;
119 dma_async_tx_callback _cb_fn;
120 void *_cb_param;
121 unsigned long local_flags;
122 int xor_src_cnt;
123 int i = 0, src_off = 0, int_en;
124
125 BUG_ON(src_cnt <= 1);
126
127 while (src_cnt) {
128 local_flags = flags;
129 if (device) { /* run the xor asynchronously */
130 xor_src_cnt = min(src_cnt, device->max_xor);
131 /* if we are submitting additional xors
132 * only set the callback on the last transaction
133 */
134 if (src_cnt > xor_src_cnt) {
135 local_flags &= ~ASYNC_TX_ACK;
136 _cb_fn = NULL;
137 _cb_param = NULL;
138 } else {
139 _cb_fn = cb_fn;
140 _cb_param = cb_param;
141 }
142
143 int_en = _cb_fn ? 1 : 0;
144
145 tx = device->device_prep_dma_xor(
146 chan, xor_src_cnt, len, int_en);
147
148 if (tx) {
149 do_async_xor(tx, device, chan, dest,
150 &src_list[src_off], offset, xor_src_cnt, len,
151 local_flags, depend_tx, _cb_fn,
152 _cb_param);
153 } else /* fall through */
154 goto xor_sync;
155 } else { /* run the xor synchronously */
156xor_sync:
157 /* in the sync case the dest is an implied source
158 * (assumes the dest is at the src_off index)
159 */
160 if (flags & ASYNC_TX_XOR_DROP_DST) {
161 src_cnt--;
162 src_off++;
163 }
164
165 /* process up to 'MAX_XOR_BLOCKS' sources */
166 xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
167
168 /* if we are submitting additional xors
169 * only set the callback on the last transaction
170 */
171 if (src_cnt > xor_src_cnt) {
172 local_flags &= ~ASYNC_TX_ACK;
173 _cb_fn = NULL;
174 _cb_param = NULL;
175 } else {
176 _cb_fn = cb_fn;
177 _cb_param = cb_param;
178 }
179
180 /* wait for any prerequisite operations */
181 if (depend_tx) {
182 /* if ack is already set then we cannot be sure
183 * we are referring to the correct operation
184 */
185 BUG_ON(depend_tx->ack);
186 if (dma_wait_for_async_tx(depend_tx) ==
187 DMA_ERROR)
188 panic("%s: DMA_ERROR waiting for "
189 "depend_tx\n",
190 __FUNCTION__);
191 }
192
193 do_sync_xor(dest, &src_list[src_off], offset,
194 xor_src_cnt, len, local_flags, depend_tx,
195 _cb_fn, _cb_param);
196 }
197
198 /* the previous tx is hidden from the client,
199 * so ack it
200 */
201 if (i && depend_tx)
202 async_tx_ack(depend_tx);
203
204 depend_tx = tx;
205
206 if (src_cnt > xor_src_cnt) {
207 /* drop completed sources */
208 src_cnt -= xor_src_cnt;
209 src_off += xor_src_cnt;
210
211 /* unconditionally preserve the destination */
212 flags &= ~ASYNC_TX_XOR_ZERO_DST;
213
214 /* use the intermediate result a source, but remember
215 * it's dropped, because it's implied, in the sync case
216 */
217 src_list[--src_off] = dest;
218 src_cnt++;
219 flags |= ASYNC_TX_XOR_DROP_DST;
220 } else
221 src_cnt = 0;
222 i++;
223 }
224
225 return tx;
226}
227EXPORT_SYMBOL_GPL(async_xor);
228
229static int page_is_zero(struct page *p, unsigned int offset, size_t len)
230{
231 char *a = page_address(p) + offset;
232 return ((*(u32 *) a) == 0 &&
233 memcmp(a, a + 4, len - 4) == 0);
234}
235
236/**
237 * async_xor_zero_sum - attempt a xor parity check with a dma engine.
238 * @dest: destination page used if the xor is performed synchronously
239 * @src_list: array of source pages. The dest page must be listed as a source
240 * at index zero. The contents of this array may be overwritten.
241 * @offset: offset in pages to start transaction
242 * @src_cnt: number of source pages
243 * @len: length in bytes
244 * @result: 0 if sum == 0 else non-zero
245 * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
246 * @depend_tx: xor depends on the result of this transaction.
247 * @cb_fn: function to call when the xor completes
248 * @cb_param: parameter to pass to the callback routine
249 */
250struct dma_async_tx_descriptor *
251async_xor_zero_sum(struct page *dest, struct page **src_list,
252 unsigned int offset, int src_cnt, size_t len,
253 u32 *result, enum async_tx_flags flags,
254 struct dma_async_tx_descriptor *depend_tx,
255 dma_async_tx_callback cb_fn, void *cb_param)
256{
257 struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM);
258 struct dma_device *device = chan ? chan->device : NULL;
259 int int_en = cb_fn ? 1 : 0;
260 struct dma_async_tx_descriptor *tx = device ?
261 device->device_prep_dma_zero_sum(chan, src_cnt, len, result,
262 int_en) : NULL;
263 int i;
264
265 BUG_ON(src_cnt <= 1);
266
267 if (tx) {
268 dma_addr_t dma_addr;
269 enum dma_data_direction dir;
270
271 pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
272
273 dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
274 DMA_NONE : DMA_TO_DEVICE;
275
276 for (i = 0; i < src_cnt; i++) {
277 dma_addr = dma_map_page(device->dev, src_list[i],
278 offset, len, dir);
279 tx->tx_set_src(dma_addr, tx, i);
280 }
281
282 async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
283 } else {
284 unsigned long xor_flags = flags;
285
286 pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
287
288 xor_flags |= ASYNC_TX_XOR_DROP_DST;
289 xor_flags &= ~ASYNC_TX_ACK;
290
291 tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
292 depend_tx, NULL, NULL);
293
294 if (tx) {
295 if (dma_wait_for_async_tx(tx) == DMA_ERROR)
296 panic("%s: DMA_ERROR waiting for tx\n",
297 __FUNCTION__);
298 async_tx_ack(tx);
299 }
300
301 *result = page_is_zero(dest, offset, len) ? 0 : 1;
302
303 tx = NULL;
304
305 async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
306 }
307
308 return tx;
309}
310EXPORT_SYMBOL_GPL(async_xor_zero_sum);
311
312static int __init async_xor_init(void)
313{
314 return 0;
315}
316
317static void __exit async_xor_exit(void)
318{
319 do { } while (0);
320}
321
322module_init(async_xor_init);
323module_exit(async_xor_exit);
324
325MODULE_AUTHOR("Intel Corporation");
326MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
327MODULE_LICENSE("GPL");
diff --git a/drivers/md/xor.c b/crypto/xor.c
index 324897c4be4e..b2e6db075e49 100644
--- a/drivers/md/xor.c
+++ b/crypto/xor.c
@@ -26,32 +26,32 @@
26static struct xor_block_template *active_template; 26static struct xor_block_template *active_template;
27 27
28void 28void
29xor_block(unsigned int count, unsigned int bytes, void **ptr) 29xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
30{ 30{
31 unsigned long *p0, *p1, *p2, *p3, *p4; 31 unsigned long *p1, *p2, *p3, *p4;
32 32
33 p0 = (unsigned long *) ptr[0]; 33 p1 = (unsigned long *) srcs[0];
34 p1 = (unsigned long *) ptr[1]; 34 if (src_count == 1) {
35 if (count == 2) { 35 active_template->do_2(bytes, dest, p1);
36 active_template->do_2(bytes, p0, p1);
37 return; 36 return;
38 } 37 }
39 38
40 p2 = (unsigned long *) ptr[2]; 39 p2 = (unsigned long *) srcs[1];
41 if (count == 3) { 40 if (src_count == 2) {
42 active_template->do_3(bytes, p0, p1, p2); 41 active_template->do_3(bytes, dest, p1, p2);
43 return; 42 return;
44 } 43 }
45 44
46 p3 = (unsigned long *) ptr[3]; 45 p3 = (unsigned long *) srcs[2];
47 if (count == 4) { 46 if (src_count == 3) {
48 active_template->do_4(bytes, p0, p1, p2, p3); 47 active_template->do_4(bytes, dest, p1, p2, p3);
49 return; 48 return;
50 } 49 }
51 50
52 p4 = (unsigned long *) ptr[4]; 51 p4 = (unsigned long *) srcs[3];
53 active_template->do_5(bytes, p0, p1, p2, p3, p4); 52 active_template->do_5(bytes, dest, p1, p2, p3, p4);
54} 53}
54EXPORT_SYMBOL(xor_blocks);
55 55
56/* Set of all registered templates. */ 56/* Set of all registered templates. */
57static struct xor_block_template *template_list; 57static struct xor_block_template *template_list;
@@ -78,7 +78,7 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
78 now = jiffies; 78 now = jiffies;
79 count = 0; 79 count = 0;
80 while (jiffies == now) { 80 while (jiffies == now) {
81 mb(); 81 mb(); /* prevent loop optimzation */
82 tmpl->do_2(BENCH_SIZE, b1, b2); 82 tmpl->do_2(BENCH_SIZE, b1, b2);
83 mb(); 83 mb();
84 count++; 84 count++;
@@ -91,26 +91,26 @@ do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
91 speed = max * (HZ * BENCH_SIZE / 1024); 91 speed = max * (HZ * BENCH_SIZE / 1024);
92 tmpl->speed = speed; 92 tmpl->speed = speed;
93 93
94 printk(" %-10s: %5d.%03d MB/sec\n", tmpl->name, 94 printk(KERN_INFO " %-10s: %5d.%03d MB/sec\n", tmpl->name,
95 speed / 1000, speed % 1000); 95 speed / 1000, speed % 1000);
96} 96}
97 97
98static int 98static int __init
99calibrate_xor_block(void) 99calibrate_xor_blocks(void)
100{ 100{
101 void *b1, *b2; 101 void *b1, *b2;
102 struct xor_block_template *f, *fastest; 102 struct xor_block_template *f, *fastest;
103 103
104 b1 = (void *) __get_free_pages(GFP_KERNEL, 2); 104 b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
105 if (! b1) { 105 if (!b1) {
106 printk("raid5: Yikes! No memory available.\n"); 106 printk(KERN_WARNING "xor: Yikes! No memory available.\n");
107 return -ENOMEM; 107 return -ENOMEM;
108 } 108 }
109 b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; 109 b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
110 110
111 /* 111 /*
112 * If this arch/cpu has a short-circuited selection, don't loop through all 112 * If this arch/cpu has a short-circuited selection, don't loop through
113 * the possible functions, just test the best one 113 * all the possible functions, just test the best one
114 */ 114 */
115 115
116 fastest = NULL; 116 fastest = NULL;
@@ -122,11 +122,12 @@ calibrate_xor_block(void)
122#define xor_speed(templ) do_xor_speed((templ), b1, b2) 122#define xor_speed(templ) do_xor_speed((templ), b1, b2)
123 123
124 if (fastest) { 124 if (fastest) {
125 printk(KERN_INFO "raid5: automatically using best checksumming function: %s\n", 125 printk(KERN_INFO "xor: automatically using best "
126 "checksumming function: %s\n",
126 fastest->name); 127 fastest->name);
127 xor_speed(fastest); 128 xor_speed(fastest);
128 } else { 129 } else {
129 printk(KERN_INFO "raid5: measuring checksumming speed\n"); 130 printk(KERN_INFO "xor: measuring software checksum speed\n");
130 XOR_TRY_TEMPLATES; 131 XOR_TRY_TEMPLATES;
131 fastest = template_list; 132 fastest = template_list;
132 for (f = fastest; f; f = f->next) 133 for (f = fastest; f; f = f->next)
@@ -134,7 +135,7 @@ calibrate_xor_block(void)
134 fastest = f; 135 fastest = f;
135 } 136 }
136 137
137 printk("raid5: using function: %s (%d.%03d MB/sec)\n", 138 printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
138 fastest->name, fastest->speed / 1000, fastest->speed % 1000); 139 fastest->name, fastest->speed / 1000, fastest->speed % 1000);
139 140
140#undef xor_speed 141#undef xor_speed
@@ -147,8 +148,8 @@ calibrate_xor_block(void)
147 148
148static __exit void xor_exit(void) { } 149static __exit void xor_exit(void) { }
149 150
150EXPORT_SYMBOL(xor_block);
151MODULE_LICENSE("GPL"); 151MODULE_LICENSE("GPL");
152 152
153module_init(calibrate_xor_block); 153/* when built-in xor.o must initialize before drivers/md/md.o */
154core_initcall(calibrate_xor_blocks);
154module_exit(xor_exit); 155module_exit(xor_exit);
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 72be6c63edfc..b31756d59978 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -8,8 +8,8 @@ menu "DMA Engine support"
8config DMA_ENGINE 8config DMA_ENGINE
9 bool "Support for DMA engines" 9 bool "Support for DMA engines"
10 ---help--- 10 ---help---
11 DMA engines offload copy operations from the CPU to dedicated 11 DMA engines offload bulk memory operations from the CPU to dedicated
12 hardware, allowing the copies to happen asynchronously. 12 hardware, allowing the operations to happen asynchronously.
13 13
14comment "DMA Clients" 14comment "DMA Clients"
15 15
@@ -32,4 +32,12 @@ config INTEL_IOATDMA
32 ---help--- 32 ---help---
33 Enable support for the Intel(R) I/OAT DMA engine. 33 Enable support for the Intel(R) I/OAT DMA engine.
34 34
35config INTEL_IOP_ADMA
36 tristate "Intel IOP ADMA support"
37 depends on DMA_ENGINE && (ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX)
38 select ASYNC_CORE
39 default m
40 ---help---
41 Enable support for the Intel(R) IOP Series RAID engines.
42
35endmenu 43endmenu
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index bdcfdbdb1aec..b3839b687ae0 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,3 +1,4 @@
1obj-$(CONFIG_DMA_ENGINE) += dmaengine.o 1obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
2obj-$(CONFIG_NET_DMA) += iovlock.o 2obj-$(CONFIG_NET_DMA) += iovlock.o
3obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o 3obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
4obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 322ee2984e3d..82489923af09 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -37,11 +37,11 @@
37 * Each device has a channels list, which runs unlocked but is never modified 37 * Each device has a channels list, which runs unlocked but is never modified
38 * once the device is registered, it's just setup by the driver. 38 * once the device is registered, it's just setup by the driver.
39 * 39 *
40 * Each client has a channels list, it's only modified under the client->lock 40 * Each client is responsible for keeping track of the channels it uses. See
41 * and in an RCU callback, so it's safe to read under rcu_read_lock(). 41 * the definition of dma_event_callback in dmaengine.h.
42 * 42 *
43 * Each device has a kref, which is initialized to 1 when the device is 43 * Each device has a kref, which is initialized to 1 when the device is
44 * registered. A kref_put is done for each class_device registered. When the 44 * registered. A kref_get is done for each class_device registered. When the
45 * class_device is released, the coresponding kref_put is done in the release 45 * class_device is released, the coresponding kref_put is done in the release
46 * method. Every time one of the device's channels is allocated to a client, 46 * method. Every time one of the device's channels is allocated to a client,
47 * a kref_get occurs. When the channel is freed, the coresponding kref_put 47 * a kref_get occurs. When the channel is freed, the coresponding kref_put
@@ -51,14 +51,17 @@
51 * references to finish. 51 * references to finish.
52 * 52 *
53 * Each channel has an open-coded implementation of Rusty Russell's "bigref," 53 * Each channel has an open-coded implementation of Rusty Russell's "bigref,"
54 * with a kref and a per_cpu local_t. A single reference is set when on an 54 * with a kref and a per_cpu local_t. A dma_chan_get is called when a client
55 * ADDED event, and removed with a REMOVE event. Net DMA client takes an 55 * signals that it wants to use a channel, and dma_chan_put is called when
56 * extra reference per outstanding transaction. The relase function does a 56 * a channel is removed or a client using it is unregesitered. A client can
57 * kref_put on the device. -ChrisL 57 * take extra references per outstanding transaction, as is the case with
58 * the NET DMA client. The release function does a kref_put on the device.
59 * -ChrisL, DanW
58 */ 60 */
59 61
60#include <linux/init.h> 62#include <linux/init.h>
61#include <linux/module.h> 63#include <linux/module.h>
64#include <linux/mm.h>
62#include <linux/device.h> 65#include <linux/device.h>
63#include <linux/dmaengine.h> 66#include <linux/dmaengine.h>
64#include <linux/hardirq.h> 67#include <linux/hardirq.h>
@@ -66,6 +69,7 @@
66#include <linux/percpu.h> 69#include <linux/percpu.h>
67#include <linux/rcupdate.h> 70#include <linux/rcupdate.h>
68#include <linux/mutex.h> 71#include <linux/mutex.h>
72#include <linux/jiffies.h>
69 73
70static DEFINE_MUTEX(dma_list_mutex); 74static DEFINE_MUTEX(dma_list_mutex);
71static LIST_HEAD(dma_device_list); 75static LIST_HEAD(dma_device_list);
@@ -100,8 +104,19 @@ static ssize_t show_bytes_transferred(struct class_device *cd, char *buf)
100static ssize_t show_in_use(struct class_device *cd, char *buf) 104static ssize_t show_in_use(struct class_device *cd, char *buf)
101{ 105{
102 struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev); 106 struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
107 int in_use = 0;
108
109 if (unlikely(chan->slow_ref) &&
110 atomic_read(&chan->refcount.refcount) > 1)
111 in_use = 1;
112 else {
113 if (local_read(&(per_cpu_ptr(chan->local,
114 get_cpu())->refcount)) > 0)
115 in_use = 1;
116 put_cpu();
117 }
103 118
104 return sprintf(buf, "%d\n", (chan->client ? 1 : 0)); 119 return sprintf(buf, "%d\n", in_use);
105} 120}
106 121
107static struct class_device_attribute dma_class_attrs[] = { 122static struct class_device_attribute dma_class_attrs[] = {
@@ -127,43 +142,72 @@ static struct class dma_devclass = {
127 142
128/* --- client and device registration --- */ 143/* --- client and device registration --- */
129 144
145#define dma_chan_satisfies_mask(chan, mask) \
146 __dma_chan_satisfies_mask((chan), &(mask))
147static int
148__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want)
149{
150 dma_cap_mask_t has;
151
152 bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits,
153 DMA_TX_TYPE_END);
154 return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
155}
156
130/** 157/**
131 * dma_client_chan_alloc - try to allocate a channel to a client 158 * dma_client_chan_alloc - try to allocate channels to a client
132 * @client: &dma_client 159 * @client: &dma_client
133 * 160 *
134 * Called with dma_list_mutex held. 161 * Called with dma_list_mutex held.
135 */ 162 */
136static struct dma_chan *dma_client_chan_alloc(struct dma_client *client) 163static void dma_client_chan_alloc(struct dma_client *client)
137{ 164{
138 struct dma_device *device; 165 struct dma_device *device;
139 struct dma_chan *chan; 166 struct dma_chan *chan;
140 unsigned long flags;
141 int desc; /* allocated descriptor count */ 167 int desc; /* allocated descriptor count */
168 enum dma_state_client ack;
142 169
143 /* Find a channel, any DMA engine will do */ 170 /* Find a channel */
144 list_for_each_entry(device, &dma_device_list, global_node) { 171 list_for_each_entry(device, &dma_device_list, global_node)
145 list_for_each_entry(chan, &device->channels, device_node) { 172 list_for_each_entry(chan, &device->channels, device_node) {
146 if (chan->client) 173 if (!dma_chan_satisfies_mask(chan, client->cap_mask))
147 continue; 174 continue;
148 175
149 desc = chan->device->device_alloc_chan_resources(chan); 176 desc = chan->device->device_alloc_chan_resources(chan);
150 if (desc >= 0) { 177 if (desc >= 0) {
151 kref_get(&device->refcount); 178 ack = client->event_callback(client,
152 kref_init(&chan->refcount); 179 chan,
153 chan->slow_ref = 0; 180 DMA_RESOURCE_AVAILABLE);
154 INIT_RCU_HEAD(&chan->rcu); 181
155 chan->client = client; 182 /* we are done once this client rejects
156 spin_lock_irqsave(&client->lock, flags); 183 * an available resource
157 list_add_tail_rcu(&chan->client_node, 184 */
158 &client->channels); 185 if (ack == DMA_ACK) {
159 spin_unlock_irqrestore(&client->lock, flags); 186 dma_chan_get(chan);
160 return chan; 187 kref_get(&device->refcount);
188 } else if (ack == DMA_NAK)
189 return;
161 } 190 }
162 } 191 }
163 } 192}
193
194enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
195{
196 enum dma_status status;
197 unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
198
199 dma_async_issue_pending(chan);
200 do {
201 status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
202 if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
203 printk(KERN_ERR "dma_sync_wait_timeout!\n");
204 return DMA_ERROR;
205 }
206 } while (status == DMA_IN_PROGRESS);
164 207
165 return NULL; 208 return status;
166} 209}
210EXPORT_SYMBOL(dma_sync_wait);
167 211
168/** 212/**
169 * dma_chan_cleanup - release a DMA channel's resources 213 * dma_chan_cleanup - release a DMA channel's resources
@@ -173,7 +217,6 @@ void dma_chan_cleanup(struct kref *kref)
173{ 217{
174 struct dma_chan *chan = container_of(kref, struct dma_chan, refcount); 218 struct dma_chan *chan = container_of(kref, struct dma_chan, refcount);
175 chan->device->device_free_chan_resources(chan); 219 chan->device->device_free_chan_resources(chan);
176 chan->client = NULL;
177 kref_put(&chan->device->refcount, dma_async_device_cleanup); 220 kref_put(&chan->device->refcount, dma_async_device_cleanup);
178} 221}
179EXPORT_SYMBOL(dma_chan_cleanup); 222EXPORT_SYMBOL(dma_chan_cleanup);
@@ -189,7 +232,7 @@ static void dma_chan_free_rcu(struct rcu_head *rcu)
189 kref_put(&chan->refcount, dma_chan_cleanup); 232 kref_put(&chan->refcount, dma_chan_cleanup);
190} 233}
191 234
192static void dma_client_chan_free(struct dma_chan *chan) 235static void dma_chan_release(struct dma_chan *chan)
193{ 236{
194 atomic_add(0x7FFFFFFF, &chan->refcount.refcount); 237 atomic_add(0x7FFFFFFF, &chan->refcount.refcount);
195 chan->slow_ref = 1; 238 chan->slow_ref = 1;
@@ -197,70 +240,57 @@ static void dma_client_chan_free(struct dma_chan *chan)
197} 240}
198 241
199/** 242/**
200 * dma_chans_rebalance - reallocate channels to clients 243 * dma_chans_notify_available - broadcast available channels to the clients
201 *
202 * When the number of DMA channel in the system changes,
203 * channels need to be rebalanced among clients.
204 */ 244 */
205static void dma_chans_rebalance(void) 245static void dma_clients_notify_available(void)
206{ 246{
207 struct dma_client *client; 247 struct dma_client *client;
208 struct dma_chan *chan;
209 unsigned long flags;
210 248
211 mutex_lock(&dma_list_mutex); 249 mutex_lock(&dma_list_mutex);
212 250
213 list_for_each_entry(client, &dma_client_list, global_node) { 251 list_for_each_entry(client, &dma_client_list, global_node)
214 while (client->chans_desired > client->chan_count) { 252 dma_client_chan_alloc(client);
215 chan = dma_client_chan_alloc(client);
216 if (!chan)
217 break;
218 client->chan_count++;
219 client->event_callback(client,
220 chan,
221 DMA_RESOURCE_ADDED);
222 }
223 while (client->chans_desired < client->chan_count) {
224 spin_lock_irqsave(&client->lock, flags);
225 chan = list_entry(client->channels.next,
226 struct dma_chan,
227 client_node);
228 list_del_rcu(&chan->client_node);
229 spin_unlock_irqrestore(&client->lock, flags);
230 client->chan_count--;
231 client->event_callback(client,
232 chan,
233 DMA_RESOURCE_REMOVED);
234 dma_client_chan_free(chan);
235 }
236 }
237 253
238 mutex_unlock(&dma_list_mutex); 254 mutex_unlock(&dma_list_mutex);
239} 255}
240 256
241/** 257/**
242 * dma_async_client_register - allocate and register a &dma_client 258 * dma_chans_notify_available - tell the clients that a channel is going away
243 * @event_callback: callback for notification of channel addition/removal 259 * @chan: channel on its way out
244 */ 260 */
245struct dma_client *dma_async_client_register(dma_event_callback event_callback) 261static void dma_clients_notify_removed(struct dma_chan *chan)
246{ 262{
247 struct dma_client *client; 263 struct dma_client *client;
264 enum dma_state_client ack;
248 265
249 client = kzalloc(sizeof(*client), GFP_KERNEL); 266 mutex_lock(&dma_list_mutex);
250 if (!client)
251 return NULL;
252 267
253 INIT_LIST_HEAD(&client->channels); 268 list_for_each_entry(client, &dma_client_list, global_node) {
254 spin_lock_init(&client->lock); 269 ack = client->event_callback(client, chan,
255 client->chans_desired = 0; 270 DMA_RESOURCE_REMOVED);
256 client->chan_count = 0; 271
257 client->event_callback = event_callback; 272 /* client was holding resources for this channel so
273 * free it
274 */
275 if (ack == DMA_ACK) {
276 dma_chan_put(chan);
277 kref_put(&chan->device->refcount,
278 dma_async_device_cleanup);
279 }
280 }
258 281
282 mutex_unlock(&dma_list_mutex);
283}
284
285/**
286 * dma_async_client_register - register a &dma_client
287 * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask'
288 */
289void dma_async_client_register(struct dma_client *client)
290{
259 mutex_lock(&dma_list_mutex); 291 mutex_lock(&dma_list_mutex);
260 list_add_tail(&client->global_node, &dma_client_list); 292 list_add_tail(&client->global_node, &dma_client_list);
261 mutex_unlock(&dma_list_mutex); 293 mutex_unlock(&dma_list_mutex);
262
263 return client;
264} 294}
265EXPORT_SYMBOL(dma_async_client_register); 295EXPORT_SYMBOL(dma_async_client_register);
266 296
@@ -272,40 +302,42 @@ EXPORT_SYMBOL(dma_async_client_register);
272 */ 302 */
273void dma_async_client_unregister(struct dma_client *client) 303void dma_async_client_unregister(struct dma_client *client)
274{ 304{
305 struct dma_device *device;
275 struct dma_chan *chan; 306 struct dma_chan *chan;
307 enum dma_state_client ack;
276 308
277 if (!client) 309 if (!client)
278 return; 310 return;
279 311
280 rcu_read_lock();
281 list_for_each_entry_rcu(chan, &client->channels, client_node)
282 dma_client_chan_free(chan);
283 rcu_read_unlock();
284
285 mutex_lock(&dma_list_mutex); 312 mutex_lock(&dma_list_mutex);
313 /* free all channels the client is holding */
314 list_for_each_entry(device, &dma_device_list, global_node)
315 list_for_each_entry(chan, &device->channels, device_node) {
316 ack = client->event_callback(client, chan,
317 DMA_RESOURCE_REMOVED);
318
319 if (ack == DMA_ACK) {
320 dma_chan_put(chan);
321 kref_put(&chan->device->refcount,
322 dma_async_device_cleanup);
323 }
324 }
325
286 list_del(&client->global_node); 326 list_del(&client->global_node);
287 mutex_unlock(&dma_list_mutex); 327 mutex_unlock(&dma_list_mutex);
288
289 kfree(client);
290 dma_chans_rebalance();
291} 328}
292EXPORT_SYMBOL(dma_async_client_unregister); 329EXPORT_SYMBOL(dma_async_client_unregister);
293 330
294/** 331/**
295 * dma_async_client_chan_request - request DMA channels 332 * dma_async_client_chan_request - send all available channels to the
296 * @client: &dma_client 333 * client that satisfy the capability mask
297 * @number: count of DMA channels requested 334 * @client - requester
298 *
299 * Clients call dma_async_client_chan_request() to specify how many
300 * DMA channels they need, 0 to free all currently allocated.
301 * The resulting allocations/frees are indicated to the client via the
302 * event callback.
303 */ 335 */
304void dma_async_client_chan_request(struct dma_client *client, 336void dma_async_client_chan_request(struct dma_client *client)
305 unsigned int number)
306{ 337{
307 client->chans_desired = number; 338 mutex_lock(&dma_list_mutex);
308 dma_chans_rebalance(); 339 dma_client_chan_alloc(client);
340 mutex_unlock(&dma_list_mutex);
309} 341}
310EXPORT_SYMBOL(dma_async_client_chan_request); 342EXPORT_SYMBOL(dma_async_client_chan_request);
311 343
@@ -316,12 +348,31 @@ EXPORT_SYMBOL(dma_async_client_chan_request);
316int dma_async_device_register(struct dma_device *device) 348int dma_async_device_register(struct dma_device *device)
317{ 349{
318 static int id; 350 static int id;
319 int chancnt = 0; 351 int chancnt = 0, rc;
320 struct dma_chan* chan; 352 struct dma_chan* chan;
321 353
322 if (!device) 354 if (!device)
323 return -ENODEV; 355 return -ENODEV;
324 356
357 /* validate device routines */
358 BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) &&
359 !device->device_prep_dma_memcpy);
360 BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
361 !device->device_prep_dma_xor);
362 BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
363 !device->device_prep_dma_zero_sum);
364 BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
365 !device->device_prep_dma_memset);
366 BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
367 !device->device_prep_dma_interrupt);
368
369 BUG_ON(!device->device_alloc_chan_resources);
370 BUG_ON(!device->device_free_chan_resources);
371 BUG_ON(!device->device_dependency_added);
372 BUG_ON(!device->device_is_tx_complete);
373 BUG_ON(!device->device_issue_pending);
374 BUG_ON(!device->dev);
375
325 init_completion(&device->done); 376 init_completion(&device->done);
326 kref_init(&device->refcount); 377 kref_init(&device->refcount);
327 device->dev_id = id++; 378 device->dev_id = id++;
@@ -338,17 +389,38 @@ int dma_async_device_register(struct dma_device *device)
338 snprintf(chan->class_dev.class_id, BUS_ID_SIZE, "dma%dchan%d", 389 snprintf(chan->class_dev.class_id, BUS_ID_SIZE, "dma%dchan%d",
339 device->dev_id, chan->chan_id); 390 device->dev_id, chan->chan_id);
340 391
392 rc = class_device_register(&chan->class_dev);
393 if (rc) {
394 chancnt--;
395 free_percpu(chan->local);
396 chan->local = NULL;
397 goto err_out;
398 }
399
341 kref_get(&device->refcount); 400 kref_get(&device->refcount);
342 class_device_register(&chan->class_dev); 401 kref_init(&chan->refcount);
402 chan->slow_ref = 0;
403 INIT_RCU_HEAD(&chan->rcu);
343 } 404 }
344 405
345 mutex_lock(&dma_list_mutex); 406 mutex_lock(&dma_list_mutex);
346 list_add_tail(&device->global_node, &dma_device_list); 407 list_add_tail(&device->global_node, &dma_device_list);
347 mutex_unlock(&dma_list_mutex); 408 mutex_unlock(&dma_list_mutex);
348 409
349 dma_chans_rebalance(); 410 dma_clients_notify_available();
350 411
351 return 0; 412 return 0;
413
414err_out:
415 list_for_each_entry(chan, &device->channels, device_node) {
416 if (chan->local == NULL)
417 continue;
418 kref_put(&device->refcount, dma_async_device_cleanup);
419 class_device_unregister(&chan->class_dev);
420 chancnt--;
421 free_percpu(chan->local);
422 }
423 return rc;
352} 424}
353EXPORT_SYMBOL(dma_async_device_register); 425EXPORT_SYMBOL(dma_async_device_register);
354 426
@@ -371,32 +443,165 @@ static void dma_async_device_cleanup(struct kref *kref)
371void dma_async_device_unregister(struct dma_device *device) 443void dma_async_device_unregister(struct dma_device *device)
372{ 444{
373 struct dma_chan *chan; 445 struct dma_chan *chan;
374 unsigned long flags;
375 446
376 mutex_lock(&dma_list_mutex); 447 mutex_lock(&dma_list_mutex);
377 list_del(&device->global_node); 448 list_del(&device->global_node);
378 mutex_unlock(&dma_list_mutex); 449 mutex_unlock(&dma_list_mutex);
379 450
380 list_for_each_entry(chan, &device->channels, device_node) { 451 list_for_each_entry(chan, &device->channels, device_node) {
381 if (chan->client) { 452 dma_clients_notify_removed(chan);
382 spin_lock_irqsave(&chan->client->lock, flags);
383 list_del(&chan->client_node);
384 chan->client->chan_count--;
385 spin_unlock_irqrestore(&chan->client->lock, flags);
386 chan->client->event_callback(chan->client,
387 chan,
388 DMA_RESOURCE_REMOVED);
389 dma_client_chan_free(chan);
390 }
391 class_device_unregister(&chan->class_dev); 453 class_device_unregister(&chan->class_dev);
454 dma_chan_release(chan);
392 } 455 }
393 dma_chans_rebalance();
394 456
395 kref_put(&device->refcount, dma_async_device_cleanup); 457 kref_put(&device->refcount, dma_async_device_cleanup);
396 wait_for_completion(&device->done); 458 wait_for_completion(&device->done);
397} 459}
398EXPORT_SYMBOL(dma_async_device_unregister); 460EXPORT_SYMBOL(dma_async_device_unregister);
399 461
462/**
463 * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
464 * @chan: DMA channel to offload copy to
465 * @dest: destination address (virtual)
466 * @src: source address (virtual)
467 * @len: length
468 *
469 * Both @dest and @src must be mappable to a bus address according to the
470 * DMA mapping API rules for streaming mappings.
471 * Both @dest and @src must stay memory resident (kernel memory or locked
472 * user space pages).
473 */
474dma_cookie_t
475dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
476 void *src, size_t len)
477{
478 struct dma_device *dev = chan->device;
479 struct dma_async_tx_descriptor *tx;
480 dma_addr_t addr;
481 dma_cookie_t cookie;
482 int cpu;
483
484 tx = dev->device_prep_dma_memcpy(chan, len, 0);
485 if (!tx)
486 return -ENOMEM;
487
488 tx->ack = 1;
489 tx->callback = NULL;
490 addr = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
491 tx->tx_set_src(addr, tx, 0);
492 addr = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
493 tx->tx_set_dest(addr, tx, 0);
494 cookie = tx->tx_submit(tx);
495
496 cpu = get_cpu();
497 per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
498 per_cpu_ptr(chan->local, cpu)->memcpy_count++;
499 put_cpu();
500
501 return cookie;
502}
503EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
504
505/**
506 * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
507 * @chan: DMA channel to offload copy to
508 * @page: destination page
509 * @offset: offset in page to copy to
510 * @kdata: source address (virtual)
511 * @len: length
512 *
513 * Both @page/@offset and @kdata must be mappable to a bus address according
514 * to the DMA mapping API rules for streaming mappings.
515 * Both @page/@offset and @kdata must stay memory resident (kernel memory or
516 * locked user space pages)
517 */
518dma_cookie_t
519dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
520 unsigned int offset, void *kdata, size_t len)
521{
522 struct dma_device *dev = chan->device;
523 struct dma_async_tx_descriptor *tx;
524 dma_addr_t addr;
525 dma_cookie_t cookie;
526 int cpu;
527
528 tx = dev->device_prep_dma_memcpy(chan, len, 0);
529 if (!tx)
530 return -ENOMEM;
531
532 tx->ack = 1;
533 tx->callback = NULL;
534 addr = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
535 tx->tx_set_src(addr, tx, 0);
536 addr = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
537 tx->tx_set_dest(addr, tx, 0);
538 cookie = tx->tx_submit(tx);
539
540 cpu = get_cpu();
541 per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
542 per_cpu_ptr(chan->local, cpu)->memcpy_count++;
543 put_cpu();
544
545 return cookie;
546}
547EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
548
549/**
550 * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
551 * @chan: DMA channel to offload copy to
552 * @dest_pg: destination page
553 * @dest_off: offset in page to copy to
554 * @src_pg: source page
555 * @src_off: offset in page to copy from
556 * @len: length
557 *
558 * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
559 * address according to the DMA mapping API rules for streaming mappings.
560 * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
561 * (kernel memory or locked user space pages).
562 */
563dma_cookie_t
564dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
565 unsigned int dest_off, struct page *src_pg, unsigned int src_off,
566 size_t len)
567{
568 struct dma_device *dev = chan->device;
569 struct dma_async_tx_descriptor *tx;
570 dma_addr_t addr;
571 dma_cookie_t cookie;
572 int cpu;
573
574 tx = dev->device_prep_dma_memcpy(chan, len, 0);
575 if (!tx)
576 return -ENOMEM;
577
578 tx->ack = 1;
579 tx->callback = NULL;
580 addr = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
581 tx->tx_set_src(addr, tx, 0);
582 addr = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE);
583 tx->tx_set_dest(addr, tx, 0);
584 cookie = tx->tx_submit(tx);
585
586 cpu = get_cpu();
587 per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
588 per_cpu_ptr(chan->local, cpu)->memcpy_count++;
589 put_cpu();
590
591 return cookie;
592}
593EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
594
595void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
596 struct dma_chan *chan)
597{
598 tx->chan = chan;
599 spin_lock_init(&tx->lock);
600 INIT_LIST_HEAD(&tx->depend_node);
601 INIT_LIST_HEAD(&tx->depend_list);
602}
603EXPORT_SYMBOL(dma_async_tx_descriptor_init);
604
400static int __init dma_bus_init(void) 605static int __init dma_bus_init(void)
401{ 606{
402 mutex_init(&dma_list_mutex); 607 mutex_init(&dma_list_mutex);
diff --git a/drivers/dma/ioatdma.c b/drivers/dma/ioatdma.c
index 850014139556..5fbe56b5cea0 100644
--- a/drivers/dma/ioatdma.c
+++ b/drivers/dma/ioatdma.c
@@ -32,16 +32,17 @@
32#include <linux/delay.h> 32#include <linux/delay.h>
33#include <linux/dma-mapping.h> 33#include <linux/dma-mapping.h>
34#include "ioatdma.h" 34#include "ioatdma.h"
35#include "ioatdma_io.h"
36#include "ioatdma_registers.h" 35#include "ioatdma_registers.h"
37#include "ioatdma_hw.h" 36#include "ioatdma_hw.h"
38 37
39#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) 38#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
40#define to_ioat_device(dev) container_of(dev, struct ioat_device, common) 39#define to_ioat_device(dev) container_of(dev, struct ioat_device, common)
41#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) 40#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
41#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
42 42
43/* internal functions */ 43/* internal functions */
44static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent); 44static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
45static void ioat_shutdown(struct pci_dev *pdev);
45static void __devexit ioat_remove(struct pci_dev *pdev); 46static void __devexit ioat_remove(struct pci_dev *pdev);
46 47
47static int enumerate_dma_channels(struct ioat_device *device) 48static int enumerate_dma_channels(struct ioat_device *device)
@@ -51,8 +52,8 @@ static int enumerate_dma_channels(struct ioat_device *device)
51 int i; 52 int i;
52 struct ioat_dma_chan *ioat_chan; 53 struct ioat_dma_chan *ioat_chan;
53 54
54 device->common.chancnt = ioatdma_read8(device, IOAT_CHANCNT_OFFSET); 55 device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
55 xfercap_scale = ioatdma_read8(device, IOAT_XFERCAP_OFFSET); 56 xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
56 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); 57 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
57 58
58 for (i = 0; i < device->common.chancnt; i++) { 59 for (i = 0; i < device->common.chancnt; i++) {
@@ -71,13 +72,79 @@ static int enumerate_dma_channels(struct ioat_device *device)
71 INIT_LIST_HEAD(&ioat_chan->used_desc); 72 INIT_LIST_HEAD(&ioat_chan->used_desc);
72 /* This should be made common somewhere in dmaengine.c */ 73 /* This should be made common somewhere in dmaengine.c */
73 ioat_chan->common.device = &device->common; 74 ioat_chan->common.device = &device->common;
74 ioat_chan->common.client = NULL;
75 list_add_tail(&ioat_chan->common.device_node, 75 list_add_tail(&ioat_chan->common.device_node,
76 &device->common.channels); 76 &device->common.channels);
77 } 77 }
78 return device->common.chancnt; 78 return device->common.chancnt;
79} 79}
80 80
81static void
82ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
83{
84 struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
85 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
86
87 pci_unmap_addr_set(desc, src, addr);
88
89 list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
90 iter->hw->src_addr = addr;
91 addr += ioat_chan->xfercap;
92 }
93
94}
95
96static void
97ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
98{
99 struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
100 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
101
102 pci_unmap_addr_set(desc, dst, addr);
103
104 list_for_each_entry(iter, &desc->async_tx.tx_list, node) {
105 iter->hw->dst_addr = addr;
106 addr += ioat_chan->xfercap;
107 }
108}
109
110static dma_cookie_t
111ioat_tx_submit(struct dma_async_tx_descriptor *tx)
112{
113 struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
114 struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
115 int append = 0;
116 dma_cookie_t cookie;
117 struct ioat_desc_sw *group_start;
118
119 group_start = list_entry(desc->async_tx.tx_list.next,
120 struct ioat_desc_sw, node);
121 spin_lock_bh(&ioat_chan->desc_lock);
122 /* cookie incr and addition to used_list must be atomic */
123 cookie = ioat_chan->common.cookie;
124 cookie++;
125 if (cookie < 0)
126 cookie = 1;
127 ioat_chan->common.cookie = desc->async_tx.cookie = cookie;
128
129 /* write address into NextDescriptor field of last desc in chain */
130 to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
131 group_start->async_tx.phys;
132 list_splice_init(&desc->async_tx.tx_list, ioat_chan->used_desc.prev);
133
134 ioat_chan->pending += desc->tx_cnt;
135 if (ioat_chan->pending >= 4) {
136 append = 1;
137 ioat_chan->pending = 0;
138 }
139 spin_unlock_bh(&ioat_chan->desc_lock);
140
141 if (append)
142 writeb(IOAT_CHANCMD_APPEND,
143 ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
144
145 return cookie;
146}
147
81static struct ioat_desc_sw *ioat_dma_alloc_descriptor( 148static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
82 struct ioat_dma_chan *ioat_chan, 149 struct ioat_dma_chan *ioat_chan,
83 gfp_t flags) 150 gfp_t flags)
@@ -99,8 +166,13 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
99 } 166 }
100 167
101 memset(desc, 0, sizeof(*desc)); 168 memset(desc, 0, sizeof(*desc));
169 dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
170 desc_sw->async_tx.tx_set_src = ioat_set_src;
171 desc_sw->async_tx.tx_set_dest = ioat_set_dest;
172 desc_sw->async_tx.tx_submit = ioat_tx_submit;
173 INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
102 desc_sw->hw = desc; 174 desc_sw->hw = desc;
103 desc_sw->phys = phys; 175 desc_sw->async_tx.phys = phys;
104 176
105 return desc_sw; 177 return desc_sw;
106} 178}
@@ -123,7 +195,7 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
123 * In-use bit automatically set by reading chanctrl 195 * In-use bit automatically set by reading chanctrl
124 * If 0, we got it, if 1, someone else did 196 * If 0, we got it, if 1, someone else did
125 */ 197 */
126 chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET); 198 chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
127 if (chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE) 199 if (chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE)
128 return -EBUSY; 200 return -EBUSY;
129 201
@@ -132,12 +204,12 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
132 IOAT_CHANCTRL_ERR_INT_EN | 204 IOAT_CHANCTRL_ERR_INT_EN |
133 IOAT_CHANCTRL_ANY_ERR_ABORT_EN | 205 IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
134 IOAT_CHANCTRL_ERR_COMPLETION_EN; 206 IOAT_CHANCTRL_ERR_COMPLETION_EN;
135 ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl); 207 writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
136 208
137 chanerr = ioatdma_chan_read32(ioat_chan, IOAT_CHANERR_OFFSET); 209 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
138 if (chanerr) { 210 if (chanerr) {
139 printk("IOAT: CHANERR = %x, clearing\n", chanerr); 211 printk("IOAT: CHANERR = %x, clearing\n", chanerr);
140 ioatdma_chan_write32(ioat_chan, IOAT_CHANERR_OFFSET, chanerr); 212 writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
141 } 213 }
142 214
143 /* Allocate descriptors */ 215 /* Allocate descriptors */
@@ -161,10 +233,10 @@ static int ioat_dma_alloc_chan_resources(struct dma_chan *chan)
161 &ioat_chan->completion_addr); 233 &ioat_chan->completion_addr);
162 memset(ioat_chan->completion_virt, 0, 234 memset(ioat_chan->completion_virt, 0,
163 sizeof(*ioat_chan->completion_virt)); 235 sizeof(*ioat_chan->completion_virt));
164 ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_LOW, 236 writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
165 ((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF); 237 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
166 ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_HIGH, 238 writel(((u64) ioat_chan->completion_addr) >> 32,
167 ((u64) ioat_chan->completion_addr) >> 32); 239 ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
168 240
169 ioat_start_null_desc(ioat_chan); 241 ioat_start_null_desc(ioat_chan);
170 return i; 242 return i;
@@ -182,18 +254,20 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
182 254
183 ioat_dma_memcpy_cleanup(ioat_chan); 255 ioat_dma_memcpy_cleanup(ioat_chan);
184 256
185 ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET); 257 writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
186 258
187 spin_lock_bh(&ioat_chan->desc_lock); 259 spin_lock_bh(&ioat_chan->desc_lock);
188 list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) { 260 list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) {
189 in_use_descs++; 261 in_use_descs++;
190 list_del(&desc->node); 262 list_del(&desc->node);
191 pci_pool_free(ioat_device->dma_pool, desc->hw, desc->phys); 263 pci_pool_free(ioat_device->dma_pool, desc->hw,
264 desc->async_tx.phys);
192 kfree(desc); 265 kfree(desc);
193 } 266 }
194 list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) { 267 list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) {
195 list_del(&desc->node); 268 list_del(&desc->node);
196 pci_pool_free(ioat_device->dma_pool, desc->hw, desc->phys); 269 pci_pool_free(ioat_device->dma_pool, desc->hw,
270 desc->async_tx.phys);
197 kfree(desc); 271 kfree(desc);
198 } 272 }
199 spin_unlock_bh(&ioat_chan->desc_lock); 273 spin_unlock_bh(&ioat_chan->desc_lock);
@@ -210,50 +284,30 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan)
210 ioat_chan->last_completion = ioat_chan->completion_addr = 0; 284 ioat_chan->last_completion = ioat_chan->completion_addr = 0;
211 285
212 /* Tell hw the chan is free */ 286 /* Tell hw the chan is free */
213 chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET); 287 chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
214 chanctrl &= ~IOAT_CHANCTRL_CHANNEL_IN_USE; 288 chanctrl &= ~IOAT_CHANCTRL_CHANNEL_IN_USE;
215 ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl); 289 writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
216} 290}
217 291
218/** 292static struct dma_async_tx_descriptor *
219 * do_ioat_dma_memcpy - actual function that initiates a IOAT DMA transaction 293ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en)
220 * @ioat_chan: IOAT DMA channel handle
221 * @dest: DMA destination address
222 * @src: DMA source address
223 * @len: transaction length in bytes
224 */
225
226static dma_cookie_t do_ioat_dma_memcpy(struct ioat_dma_chan *ioat_chan,
227 dma_addr_t dest,
228 dma_addr_t src,
229 size_t len)
230{ 294{
231 struct ioat_desc_sw *first; 295 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
232 struct ioat_desc_sw *prev; 296 struct ioat_desc_sw *first, *prev, *new;
233 struct ioat_desc_sw *new;
234 dma_cookie_t cookie;
235 LIST_HEAD(new_chain); 297 LIST_HEAD(new_chain);
236 u32 copy; 298 u32 copy;
237 size_t orig_len; 299 size_t orig_len;
238 dma_addr_t orig_src, orig_dst; 300 int desc_count = 0;
239 unsigned int desc_count = 0;
240 unsigned int append = 0;
241
242 if (!ioat_chan || !dest || !src)
243 return -EFAULT;
244 301
245 if (!len) 302 if (!len)
246 return ioat_chan->common.cookie; 303 return NULL;
247 304
248 orig_len = len; 305 orig_len = len;
249 orig_src = src;
250 orig_dst = dest;
251 306
252 first = NULL; 307 first = NULL;
253 prev = NULL; 308 prev = NULL;
254 309
255 spin_lock_bh(&ioat_chan->desc_lock); 310 spin_lock_bh(&ioat_chan->desc_lock);
256
257 while (len) { 311 while (len) {
258 if (!list_empty(&ioat_chan->free_desc)) { 312 if (!list_empty(&ioat_chan->free_desc)) {
259 new = to_ioat_desc(ioat_chan->free_desc.next); 313 new = to_ioat_desc(ioat_chan->free_desc.next);
@@ -270,141 +324,36 @@ static dma_cookie_t do_ioat_dma_memcpy(struct ioat_dma_chan *ioat_chan,
270 324
271 new->hw->size = copy; 325 new->hw->size = copy;
272 new->hw->ctl = 0; 326 new->hw->ctl = 0;
273 new->hw->src_addr = src; 327 new->async_tx.cookie = 0;
274 new->hw->dst_addr = dest; 328 new->async_tx.ack = 1;
275 new->cookie = 0;
276 329
277 /* chain together the physical address list for the HW */ 330 /* chain together the physical address list for the HW */
278 if (!first) 331 if (!first)
279 first = new; 332 first = new;
280 else 333 else
281 prev->hw->next = (u64) new->phys; 334 prev->hw->next = (u64) new->async_tx.phys;
282 335
283 prev = new; 336 prev = new;
284
285 len -= copy; 337 len -= copy;
286 dest += copy;
287 src += copy;
288
289 list_add_tail(&new->node, &new_chain); 338 list_add_tail(&new->node, &new_chain);
290 desc_count++; 339 desc_count++;
291 } 340 }
292 new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
293 new->hw->next = 0;
294 341
295 /* cookie incr and addition to used_list must be atomic */ 342 list_splice(&new_chain, &new->async_tx.tx_list);
296 343
297 cookie = ioat_chan->common.cookie; 344 new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
298 cookie++; 345 new->hw->next = 0;
299 if (cookie < 0) 346 new->tx_cnt = desc_count;
300 cookie = 1; 347 new->async_tx.ack = 0; /* client is in control of this ack */
301 ioat_chan->common.cookie = new->cookie = cookie; 348 new->async_tx.cookie = -EBUSY;
302 349
303 pci_unmap_addr_set(new, src, orig_src);
304 pci_unmap_addr_set(new, dst, orig_dst);
305 pci_unmap_len_set(new, src_len, orig_len); 350 pci_unmap_len_set(new, src_len, orig_len);
306 pci_unmap_len_set(new, dst_len, orig_len); 351 pci_unmap_len_set(new, dst_len, orig_len);
307
308 /* write address into NextDescriptor field of last desc in chain */
309 to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = first->phys;
310 list_splice_init(&new_chain, ioat_chan->used_desc.prev);
311
312 ioat_chan->pending += desc_count;
313 if (ioat_chan->pending >= 20) {
314 append = 1;
315 ioat_chan->pending = 0;
316 }
317
318 spin_unlock_bh(&ioat_chan->desc_lock); 352 spin_unlock_bh(&ioat_chan->desc_lock);
319 353
320 if (append) 354 return new ? &new->async_tx : NULL;
321 ioatdma_chan_write8(ioat_chan,
322 IOAT_CHANCMD_OFFSET,
323 IOAT_CHANCMD_APPEND);
324 return cookie;
325}
326
327/**
328 * ioat_dma_memcpy_buf_to_buf - wrapper that takes src & dest bufs
329 * @chan: IOAT DMA channel handle
330 * @dest: DMA destination address
331 * @src: DMA source address
332 * @len: transaction length in bytes
333 */
334
335static dma_cookie_t ioat_dma_memcpy_buf_to_buf(struct dma_chan *chan,
336 void *dest,
337 void *src,
338 size_t len)
339{
340 dma_addr_t dest_addr;
341 dma_addr_t src_addr;
342 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
343
344 dest_addr = pci_map_single(ioat_chan->device->pdev,
345 dest, len, PCI_DMA_FROMDEVICE);
346 src_addr = pci_map_single(ioat_chan->device->pdev,
347 src, len, PCI_DMA_TODEVICE);
348
349 return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
350} 355}
351 356
352/**
353 * ioat_dma_memcpy_buf_to_pg - wrapper, copying from a buf to a page
354 * @chan: IOAT DMA channel handle
355 * @page: pointer to the page to copy to
356 * @offset: offset into that page
357 * @src: DMA source address
358 * @len: transaction length in bytes
359 */
360
361static dma_cookie_t ioat_dma_memcpy_buf_to_pg(struct dma_chan *chan,
362 struct page *page,
363 unsigned int offset,
364 void *src,
365 size_t len)
366{
367 dma_addr_t dest_addr;
368 dma_addr_t src_addr;
369 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
370
371 dest_addr = pci_map_page(ioat_chan->device->pdev,
372 page, offset, len, PCI_DMA_FROMDEVICE);
373 src_addr = pci_map_single(ioat_chan->device->pdev,
374 src, len, PCI_DMA_TODEVICE);
375
376 return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
377}
378
379/**
380 * ioat_dma_memcpy_pg_to_pg - wrapper, copying between two pages
381 * @chan: IOAT DMA channel handle
382 * @dest_pg: pointer to the page to copy to
383 * @dest_off: offset into that page
384 * @src_pg: pointer to the page to copy from
385 * @src_off: offset into that page
386 * @len: transaction length in bytes. This is guaranteed not to make a copy
387 * across a page boundary.
388 */
389
390static dma_cookie_t ioat_dma_memcpy_pg_to_pg(struct dma_chan *chan,
391 struct page *dest_pg,
392 unsigned int dest_off,
393 struct page *src_pg,
394 unsigned int src_off,
395 size_t len)
396{
397 dma_addr_t dest_addr;
398 dma_addr_t src_addr;
399 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
400
401 dest_addr = pci_map_page(ioat_chan->device->pdev,
402 dest_pg, dest_off, len, PCI_DMA_FROMDEVICE);
403 src_addr = pci_map_page(ioat_chan->device->pdev,
404 src_pg, src_off, len, PCI_DMA_TODEVICE);
405
406 return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
407}
408 357
409/** 358/**
410 * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw 359 * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw
@@ -417,9 +366,8 @@ static void ioat_dma_memcpy_issue_pending(struct dma_chan *chan)
417 366
418 if (ioat_chan->pending != 0) { 367 if (ioat_chan->pending != 0) {
419 ioat_chan->pending = 0; 368 ioat_chan->pending = 0;
420 ioatdma_chan_write8(ioat_chan, 369 writeb(IOAT_CHANCMD_APPEND,
421 IOAT_CHANCMD_OFFSET, 370 ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
422 IOAT_CHANCMD_APPEND);
423 } 371 }
424} 372}
425 373
@@ -449,7 +397,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
449 if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == 397 if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
450 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { 398 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
451 printk("IOAT: Channel halted, chanerr = %x\n", 399 printk("IOAT: Channel halted, chanerr = %x\n",
452 ioatdma_chan_read32(chan, IOAT_CHANERR_OFFSET)); 400 readl(chan->reg_base + IOAT_CHANERR_OFFSET));
453 401
454 /* TODO do something to salvage the situation */ 402 /* TODO do something to salvage the situation */
455 } 403 }
@@ -467,8 +415,8 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
467 * exceeding xfercap, perhaps. If so, only the last one will 415 * exceeding xfercap, perhaps. If so, only the last one will
468 * have a cookie, and require unmapping. 416 * have a cookie, and require unmapping.
469 */ 417 */
470 if (desc->cookie) { 418 if (desc->async_tx.cookie) {
471 cookie = desc->cookie; 419 cookie = desc->async_tx.cookie;
472 420
473 /* yes we are unmapping both _page and _single alloc'd 421 /* yes we are unmapping both _page and _single alloc'd
474 regions with unmap_page. Is this *really* that bad? 422 regions with unmap_page. Is this *really* that bad?
@@ -483,14 +431,19 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
483 PCI_DMA_TODEVICE); 431 PCI_DMA_TODEVICE);
484 } 432 }
485 433
486 if (desc->phys != phys_complete) { 434 if (desc->async_tx.phys != phys_complete) {
487 /* a completed entry, but not the last, so cleanup */ 435 /* a completed entry, but not the last, so cleanup
488 list_del(&desc->node); 436 * if the client is done with the descriptor
489 list_add_tail(&desc->node, &chan->free_desc); 437 */
438 if (desc->async_tx.ack) {
439 list_del(&desc->node);
440 list_add_tail(&desc->node, &chan->free_desc);
441 } else
442 desc->async_tx.cookie = 0;
490 } else { 443 } else {
491 /* last used desc. Do not remove, so we can append from 444 /* last used desc. Do not remove, so we can append from
492 it, but don't look at it next time, either */ 445 it, but don't look at it next time, either */
493 desc->cookie = 0; 446 desc->async_tx.cookie = 0;
494 447
495 /* TODO check status bits? */ 448 /* TODO check status bits? */
496 break; 449 break;
@@ -506,6 +459,17 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *chan)
506 spin_unlock(&chan->cleanup_lock); 459 spin_unlock(&chan->cleanup_lock);
507} 460}
508 461
462static void ioat_dma_dependency_added(struct dma_chan *chan)
463{
464 struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
465 spin_lock_bh(&ioat_chan->desc_lock);
466 if (ioat_chan->pending == 0) {
467 spin_unlock_bh(&ioat_chan->desc_lock);
468 ioat_dma_memcpy_cleanup(ioat_chan);
469 } else
470 spin_unlock_bh(&ioat_chan->desc_lock);
471}
472
509/** 473/**
510 * ioat_dma_is_complete - poll the status of a IOAT DMA transaction 474 * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
511 * @chan: IOAT DMA channel handle 475 * @chan: IOAT DMA channel handle
@@ -553,6 +517,8 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
553 517
554static struct pci_device_id ioat_pci_tbl[] = { 518static struct pci_device_id ioat_pci_tbl[] = {
555 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, 519 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
520 { PCI_DEVICE(PCI_VENDOR_ID_UNISYS,
521 PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
556 { 0, } 522 { 0, }
557}; 523};
558 524
@@ -560,6 +526,7 @@ static struct pci_driver ioat_pci_driver = {
560 .name = "ioatdma", 526 .name = "ioatdma",
561 .id_table = ioat_pci_tbl, 527 .id_table = ioat_pci_tbl,
562 .probe = ioat_probe, 528 .probe = ioat_probe,
529 .shutdown = ioat_shutdown,
563 .remove = __devexit_p(ioat_remove), 530 .remove = __devexit_p(ioat_remove),
564}; 531};
565 532
@@ -569,21 +536,21 @@ static irqreturn_t ioat_do_interrupt(int irq, void *data)
569 unsigned long attnstatus; 536 unsigned long attnstatus;
570 u8 intrctrl; 537 u8 intrctrl;
571 538
572 intrctrl = ioatdma_read8(instance, IOAT_INTRCTRL_OFFSET); 539 intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
573 540
574 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) 541 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
575 return IRQ_NONE; 542 return IRQ_NONE;
576 543
577 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { 544 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
578 ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl); 545 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
579 return IRQ_NONE; 546 return IRQ_NONE;
580 } 547 }
581 548
582 attnstatus = ioatdma_read32(instance, IOAT_ATTNSTATUS_OFFSET); 549 attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
583 550
584 printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus); 551 printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus);
585 552
586 ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl); 553 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
587 return IRQ_HANDLED; 554 return IRQ_HANDLED;
588} 555}
589 556
@@ -607,19 +574,17 @@ static void ioat_start_null_desc(struct ioat_dma_chan *ioat_chan)
607 574
608 desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; 575 desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
609 desc->hw->next = 0; 576 desc->hw->next = 0;
577 desc->async_tx.ack = 1;
610 578
611 list_add_tail(&desc->node, &ioat_chan->used_desc); 579 list_add_tail(&desc->node, &ioat_chan->used_desc);
612 spin_unlock_bh(&ioat_chan->desc_lock); 580 spin_unlock_bh(&ioat_chan->desc_lock);
613 581
614#if (BITS_PER_LONG == 64) 582 writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
615 ioatdma_chan_write64(ioat_chan, IOAT_CHAINADDR_OFFSET, desc->phys); 583 ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW);
616#else 584 writel(((u64) desc->async_tx.phys) >> 32,
617 ioatdma_chan_write32(ioat_chan, 585 ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH);
618 IOAT_CHAINADDR_OFFSET_LOW, 586
619 (u32) desc->phys); 587 writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET);
620 ioatdma_chan_write32(ioat_chan, IOAT_CHAINADDR_OFFSET_HIGH, 0);
621#endif
622 ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_START);
623} 588}
624 589
625/* 590/*
@@ -633,6 +598,8 @@ static int ioat_self_test(struct ioat_device *device)
633 u8 *src; 598 u8 *src;
634 u8 *dest; 599 u8 *dest;
635 struct dma_chan *dma_chan; 600 struct dma_chan *dma_chan;
601 struct dma_async_tx_descriptor *tx;
602 dma_addr_t addr;
636 dma_cookie_t cookie; 603 dma_cookie_t cookie;
637 int err = 0; 604 int err = 0;
638 605
@@ -658,7 +625,15 @@ static int ioat_self_test(struct ioat_device *device)
658 goto out; 625 goto out;
659 } 626 }
660 627
661 cookie = ioat_dma_memcpy_buf_to_buf(dma_chan, dest, src, IOAT_TEST_SIZE); 628 tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0);
629 async_tx_ack(tx);
630 addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
631 DMA_TO_DEVICE);
632 ioat_set_src(addr, tx, 0);
633 addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
634 DMA_FROM_DEVICE);
635 ioat_set_dest(addr, tx, 0);
636 cookie = ioat_tx_submit(tx);
662 ioat_dma_memcpy_issue_pending(dma_chan); 637 ioat_dma_memcpy_issue_pending(dma_chan);
663 msleep(1); 638 msleep(1);
664 639
@@ -748,19 +723,20 @@ static int __devinit ioat_probe(struct pci_dev *pdev,
748 723
749 device->reg_base = reg_base; 724 device->reg_base = reg_base;
750 725
751 ioatdma_write8(device, IOAT_INTRCTRL_OFFSET, IOAT_INTRCTRL_MASTER_INT_EN); 726 writeb(IOAT_INTRCTRL_MASTER_INT_EN, device->reg_base + IOAT_INTRCTRL_OFFSET);
752 pci_set_master(pdev); 727 pci_set_master(pdev);
753 728
754 INIT_LIST_HEAD(&device->common.channels); 729 INIT_LIST_HEAD(&device->common.channels);
755 enumerate_dma_channels(device); 730 enumerate_dma_channels(device);
756 731
732 dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
757 device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources; 733 device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources;
758 device->common.device_free_chan_resources = ioat_dma_free_chan_resources; 734 device->common.device_free_chan_resources = ioat_dma_free_chan_resources;
759 device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf; 735 device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy;
760 device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg; 736 device->common.device_is_tx_complete = ioat_dma_is_complete;
761 device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg; 737 device->common.device_issue_pending = ioat_dma_memcpy_issue_pending;
762 device->common.device_memcpy_complete = ioat_dma_is_complete; 738 device->common.device_dependency_added = ioat_dma_dependency_added;
763 device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending; 739 device->common.dev = &pdev->dev;
764 printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n", 740 printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n",
765 device->common.chancnt); 741 device->common.chancnt);
766 742
@@ -787,9 +763,20 @@ err_request_regions:
787err_set_dma_mask: 763err_set_dma_mask:
788 pci_disable_device(pdev); 764 pci_disable_device(pdev);
789err_enable_device: 765err_enable_device:
766
767 printk(KERN_ERR "Intel(R) I/OAT DMA Engine initialization failed\n");
768
790 return err; 769 return err;
791} 770}
792 771
772static void ioat_shutdown(struct pci_dev *pdev)
773{
774 struct ioat_device *device;
775 device = pci_get_drvdata(pdev);
776
777 dma_async_device_unregister(&device->common);
778}
779
793static void __devexit ioat_remove(struct pci_dev *pdev) 780static void __devexit ioat_remove(struct pci_dev *pdev)
794{ 781{
795 struct ioat_device *device; 782 struct ioat_device *device;
@@ -818,7 +805,7 @@ static void __devexit ioat_remove(struct pci_dev *pdev)
818} 805}
819 806
820/* MODULE API */ 807/* MODULE API */
821MODULE_VERSION("1.7"); 808MODULE_VERSION("1.9");
822MODULE_LICENSE("GPL"); 809MODULE_LICENSE("GPL");
823MODULE_AUTHOR("Intel Corporation"); 810MODULE_AUTHOR("Intel Corporation");
824 811
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
index 62b26a9be4c9..d3726478031a 100644
--- a/drivers/dma/ioatdma.h
+++ b/drivers/dma/ioatdma.h
@@ -30,9 +30,6 @@
30 30
31#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 31#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
32 32
33extern struct list_head dma_device_list;
34extern struct list_head dma_client_list;
35
36/** 33/**
37 * struct ioat_device - internal representation of a IOAT device 34 * struct ioat_device - internal representation of a IOAT device
38 * @pdev: PCI-Express device 35 * @pdev: PCI-Express device
@@ -105,21 +102,20 @@ struct ioat_dma_chan {
105/** 102/**
106 * struct ioat_desc_sw - wrapper around hardware descriptor 103 * struct ioat_desc_sw - wrapper around hardware descriptor
107 * @hw: hardware DMA descriptor 104 * @hw: hardware DMA descriptor
108 * @node: 105 * @node: this descriptor will either be on the free list,
109 * @cookie: 106 * or attached to a transaction list (async_tx.tx_list)
110 * @phys: 107 * @tx_cnt: number of descriptors required to complete the transaction
108 * @async_tx: the generic software descriptor for all engines
111 */ 109 */
112
113struct ioat_desc_sw { 110struct ioat_desc_sw {
114 struct ioat_dma_descriptor *hw; 111 struct ioat_dma_descriptor *hw;
115 struct list_head node; 112 struct list_head node;
116 dma_cookie_t cookie; 113 int tx_cnt;
117 dma_addr_t phys;
118 DECLARE_PCI_UNMAP_ADDR(src) 114 DECLARE_PCI_UNMAP_ADDR(src)
119 DECLARE_PCI_UNMAP_LEN(src_len) 115 DECLARE_PCI_UNMAP_LEN(src_len)
120 DECLARE_PCI_UNMAP_ADDR(dst) 116 DECLARE_PCI_UNMAP_ADDR(dst)
121 DECLARE_PCI_UNMAP_LEN(dst_len) 117 DECLARE_PCI_UNMAP_LEN(dst_len)
118 struct dma_async_tx_descriptor async_tx;
122}; 119};
123 120
124#endif /* IOATDMA_H */ 121#endif /* IOATDMA_H */
125
diff --git a/drivers/dma/ioatdma_io.h b/drivers/dma/ioatdma_io.h
deleted file mode 100644
index c0b4bf66c920..000000000000
--- a/drivers/dma/ioatdma_io.h
+++ /dev/null
@@ -1,118 +0,0 @@
1/*
2 * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License as published by the Free
6 * Software Foundation; either version 2 of the License, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59
16 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * The full GNU General Public License is included in this distribution in the
19 * file called COPYING.
20 */
21#ifndef IOATDMA_IO_H
22#define IOATDMA_IO_H
23
24#include <asm/io.h>
25
26/*
27 * device and per-channel MMIO register read and write functions
28 * this is a lot of anoying inline functions, but it's typesafe
29 */
30
31static inline u8 ioatdma_read8(struct ioat_device *device,
32 unsigned int offset)
33{
34 return readb(device->reg_base + offset);
35}
36
37static inline u16 ioatdma_read16(struct ioat_device *device,
38 unsigned int offset)
39{
40 return readw(device->reg_base + offset);
41}
42
43static inline u32 ioatdma_read32(struct ioat_device *device,
44 unsigned int offset)
45{
46 return readl(device->reg_base + offset);
47}
48
49static inline void ioatdma_write8(struct ioat_device *device,
50 unsigned int offset, u8 value)
51{
52 writeb(value, device->reg_base + offset);
53}
54
55static inline void ioatdma_write16(struct ioat_device *device,
56 unsigned int offset, u16 value)
57{
58 writew(value, device->reg_base + offset);
59}
60
61static inline void ioatdma_write32(struct ioat_device *device,
62 unsigned int offset, u32 value)
63{
64 writel(value, device->reg_base + offset);
65}
66
67static inline u8 ioatdma_chan_read8(struct ioat_dma_chan *chan,
68 unsigned int offset)
69{
70 return readb(chan->reg_base + offset);
71}
72
73static inline u16 ioatdma_chan_read16(struct ioat_dma_chan *chan,
74 unsigned int offset)
75{
76 return readw(chan->reg_base + offset);
77}
78
79static inline u32 ioatdma_chan_read32(struct ioat_dma_chan *chan,
80 unsigned int offset)
81{
82 return readl(chan->reg_base + offset);
83}
84
85static inline void ioatdma_chan_write8(struct ioat_dma_chan *chan,
86 unsigned int offset, u8 value)
87{
88 writeb(value, chan->reg_base + offset);
89}
90
91static inline void ioatdma_chan_write16(struct ioat_dma_chan *chan,
92 unsigned int offset, u16 value)
93{
94 writew(value, chan->reg_base + offset);
95}
96
97static inline void ioatdma_chan_write32(struct ioat_dma_chan *chan,
98 unsigned int offset, u32 value)
99{
100 writel(value, chan->reg_base + offset);
101}
102
103#if (BITS_PER_LONG == 64)
104static inline u64 ioatdma_chan_read64(struct ioat_dma_chan *chan,
105 unsigned int offset)
106{
107 return readq(chan->reg_base + offset);
108}
109
110static inline void ioatdma_chan_write64(struct ioat_dma_chan *chan,
111 unsigned int offset, u64 value)
112{
113 writeq(value, chan->reg_base + offset);
114}
115#endif
116
117#endif /* IOATDMA_IO_H */
118
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
new file mode 100644
index 000000000000..5a1d426744d6
--- /dev/null
+++ b/drivers/dma/iop-adma.c
@@ -0,0 +1,1467 @@
1/*
2 * offload engine driver for the Intel Xscale series of i/o processors
3 * Copyright © 2006, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 *
18 */
19
20/*
21 * This driver supports the asynchrounous DMA copy and RAID engines available
22 * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
23 */
24
25#include <linux/init.h>
26#include <linux/module.h>
27#include <linux/async_tx.h>
28#include <linux/delay.h>
29#include <linux/dma-mapping.h>
30#include <linux/spinlock.h>
31#include <linux/interrupt.h>
32#include <linux/platform_device.h>
33#include <linux/memory.h>
34#include <linux/ioport.h>
35
36#include <asm/arch/adma.h>
37
38#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
39#define to_iop_adma_device(dev) \
40 container_of(dev, struct iop_adma_device, common)
41#define tx_to_iop_adma_slot(tx) \
42 container_of(tx, struct iop_adma_desc_slot, async_tx)
43
44/**
45 * iop_adma_free_slots - flags descriptor slots for reuse
46 * @slot: Slot to free
47 * Caller must hold &iop_chan->lock while calling this function
48 */
49static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
50{
51 int stride = slot->slots_per_op;
52
53 while (stride--) {
54 slot->slots_per_op = 0;
55 slot = list_entry(slot->slot_node.next,
56 struct iop_adma_desc_slot,
57 slot_node);
58 }
59}
60
61static dma_cookie_t
62iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
63 struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
64{
65 BUG_ON(desc->async_tx.cookie < 0);
66 spin_lock_bh(&desc->async_tx.lock);
67 if (desc->async_tx.cookie > 0) {
68 cookie = desc->async_tx.cookie;
69 desc->async_tx.cookie = 0;
70
71 /* call the callback (must not sleep or submit new
72 * operations to this channel)
73 */
74 if (desc->async_tx.callback)
75 desc->async_tx.callback(
76 desc->async_tx.callback_param);
77
78 /* unmap dma addresses
79 * (unmap_single vs unmap_page?)
80 */
81 if (desc->group_head && desc->unmap_len) {
82 struct iop_adma_desc_slot *unmap = desc->group_head;
83 struct device *dev =
84 &iop_chan->device->pdev->dev;
85 u32 len = unmap->unmap_len;
86 u32 src_cnt = unmap->unmap_src_cnt;
87 dma_addr_t addr = iop_desc_get_dest_addr(unmap,
88 iop_chan);
89
90 dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
91 while (src_cnt--) {
92 addr = iop_desc_get_src_addr(unmap,
93 iop_chan,
94 src_cnt);
95 dma_unmap_page(dev, addr, len,
96 DMA_TO_DEVICE);
97 }
98 desc->group_head = NULL;
99 }
100 }
101
102 /* run dependent operations */
103 async_tx_run_dependencies(&desc->async_tx);
104 spin_unlock_bh(&desc->async_tx.lock);
105
106 return cookie;
107}
108
109static int
110iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
111 struct iop_adma_chan *iop_chan)
112{
113 /* the client is allowed to attach dependent operations
114 * until 'ack' is set
115 */
116 if (!desc->async_tx.ack)
117 return 0;
118
119 /* leave the last descriptor in the chain
120 * so we can append to it
121 */
122 if (desc->chain_node.next == &iop_chan->chain)
123 return 1;
124
125 dev_dbg(iop_chan->device->common.dev,
126 "\tfree slot: %d slots_per_op: %d\n",
127 desc->idx, desc->slots_per_op);
128
129 list_del(&desc->chain_node);
130 iop_adma_free_slots(desc);
131
132 return 0;
133}
134
135static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
136{
137 struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
138 dma_cookie_t cookie = 0;
139 u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
140 int busy = iop_chan_is_busy(iop_chan);
141 int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
142
143 dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
144 /* free completed slots from the chain starting with
145 * the oldest descriptor
146 */
147 list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
148 chain_node) {
149 pr_debug("\tcookie: %d slot: %d busy: %d "
150 "this_desc: %#x next_desc: %#x ack: %d\n",
151 iter->async_tx.cookie, iter->idx, busy,
152 iter->async_tx.phys, iop_desc_get_next_desc(iter),
153 iter->async_tx.ack);
154 prefetch(_iter);
155 prefetch(&_iter->async_tx);
156
157 /* do not advance past the current descriptor loaded into the
158 * hardware channel, subsequent descriptors are either in
159 * process or have not been submitted
160 */
161 if (seen_current)
162 break;
163
164 /* stop the search if we reach the current descriptor and the
165 * channel is busy, or if it appears that the current descriptor
166 * needs to be re-read (i.e. has been appended to)
167 */
168 if (iter->async_tx.phys == current_desc) {
169 BUG_ON(seen_current++);
170 if (busy || iop_desc_get_next_desc(iter))
171 break;
172 }
173
174 /* detect the start of a group transaction */
175 if (!slot_cnt && !slots_per_op) {
176 slot_cnt = iter->slot_cnt;
177 slots_per_op = iter->slots_per_op;
178 if (slot_cnt <= slots_per_op) {
179 slot_cnt = 0;
180 slots_per_op = 0;
181 }
182 }
183
184 if (slot_cnt) {
185 pr_debug("\tgroup++\n");
186 if (!grp_start)
187 grp_start = iter;
188 slot_cnt -= slots_per_op;
189 }
190
191 /* all the members of a group are complete */
192 if (slots_per_op != 0 && slot_cnt == 0) {
193 struct iop_adma_desc_slot *grp_iter, *_grp_iter;
194 int end_of_chain = 0;
195 pr_debug("\tgroup end\n");
196
197 /* collect the total results */
198 if (grp_start->xor_check_result) {
199 u32 zero_sum_result = 0;
200 slot_cnt = grp_start->slot_cnt;
201 grp_iter = grp_start;
202
203 list_for_each_entry_from(grp_iter,
204 &iop_chan->chain, chain_node) {
205 zero_sum_result |=
206 iop_desc_get_zero_result(grp_iter);
207 pr_debug("\titer%d result: %d\n",
208 grp_iter->idx, zero_sum_result);
209 slot_cnt -= slots_per_op;
210 if (slot_cnt == 0)
211 break;
212 }
213 pr_debug("\tgrp_start->xor_check_result: %p\n",
214 grp_start->xor_check_result);
215 *grp_start->xor_check_result = zero_sum_result;
216 }
217
218 /* clean up the group */
219 slot_cnt = grp_start->slot_cnt;
220 grp_iter = grp_start;
221 list_for_each_entry_safe_from(grp_iter, _grp_iter,
222 &iop_chan->chain, chain_node) {
223 cookie = iop_adma_run_tx_complete_actions(
224 grp_iter, iop_chan, cookie);
225
226 slot_cnt -= slots_per_op;
227 end_of_chain = iop_adma_clean_slot(grp_iter,
228 iop_chan);
229
230 if (slot_cnt == 0 || end_of_chain)
231 break;
232 }
233
234 /* the group should be complete at this point */
235 BUG_ON(slot_cnt);
236
237 slots_per_op = 0;
238 grp_start = NULL;
239 if (end_of_chain)
240 break;
241 else
242 continue;
243 } else if (slots_per_op) /* wait for group completion */
244 continue;
245
246 /* write back zero sum results (single descriptor case) */
247 if (iter->xor_check_result && iter->async_tx.cookie)
248 *iter->xor_check_result =
249 iop_desc_get_zero_result(iter);
250
251 cookie = iop_adma_run_tx_complete_actions(
252 iter, iop_chan, cookie);
253
254 if (iop_adma_clean_slot(iter, iop_chan))
255 break;
256 }
257
258 BUG_ON(!seen_current);
259
260 iop_chan_idle(busy, iop_chan);
261
262 if (cookie > 0) {
263 iop_chan->completed_cookie = cookie;
264 pr_debug("\tcompleted cookie %d\n", cookie);
265 }
266}
267
268static void
269iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
270{
271 spin_lock_bh(&iop_chan->lock);
272 __iop_adma_slot_cleanup(iop_chan);
273 spin_unlock_bh(&iop_chan->lock);
274}
275
276static void iop_adma_tasklet(unsigned long data)
277{
278 struct iop_adma_chan *chan = (struct iop_adma_chan *) data;
279 __iop_adma_slot_cleanup(chan);
280}
281
282static struct iop_adma_desc_slot *
283iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
284 int slots_per_op)
285{
286 struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
287 struct list_head chain = LIST_HEAD_INIT(chain);
288 int slots_found, retry = 0;
289
290 /* start search from the last allocated descrtiptor
291 * if a contiguous allocation can not be found start searching
292 * from the beginning of the list
293 */
294retry:
295 slots_found = 0;
296 if (retry == 0)
297 iter = iop_chan->last_used;
298 else
299 iter = list_entry(&iop_chan->all_slots,
300 struct iop_adma_desc_slot,
301 slot_node);
302
303 list_for_each_entry_safe_continue(
304 iter, _iter, &iop_chan->all_slots, slot_node) {
305 prefetch(_iter);
306 prefetch(&_iter->async_tx);
307 if (iter->slots_per_op) {
308 /* give up after finding the first busy slot
309 * on the second pass through the list
310 */
311 if (retry)
312 break;
313
314 slots_found = 0;
315 continue;
316 }
317
318 /* start the allocation if the slot is correctly aligned */
319 if (!slots_found++) {
320 if (iop_desc_is_aligned(iter, slots_per_op))
321 alloc_start = iter;
322 else {
323 slots_found = 0;
324 continue;
325 }
326 }
327
328 if (slots_found == num_slots) {
329 struct iop_adma_desc_slot *alloc_tail = NULL;
330 struct iop_adma_desc_slot *last_used = NULL;
331 iter = alloc_start;
332 while (num_slots) {
333 int i;
334 dev_dbg(iop_chan->device->common.dev,
335 "allocated slot: %d "
336 "(desc %p phys: %#x) slots_per_op %d\n",
337 iter->idx, iter->hw_desc,
338 iter->async_tx.phys, slots_per_op);
339
340 /* pre-ack all but the last descriptor */
341 if (num_slots != slots_per_op)
342 iter->async_tx.ack = 1;
343 else
344 iter->async_tx.ack = 0;
345
346 list_add_tail(&iter->chain_node, &chain);
347 alloc_tail = iter;
348 iter->async_tx.cookie = 0;
349 iter->slot_cnt = num_slots;
350 iter->xor_check_result = NULL;
351 for (i = 0; i < slots_per_op; i++) {
352 iter->slots_per_op = slots_per_op - i;
353 last_used = iter;
354 iter = list_entry(iter->slot_node.next,
355 struct iop_adma_desc_slot,
356 slot_node);
357 }
358 num_slots -= slots_per_op;
359 }
360 alloc_tail->group_head = alloc_start;
361 alloc_tail->async_tx.cookie = -EBUSY;
362 list_splice(&chain, &alloc_tail->async_tx.tx_list);
363 iop_chan->last_used = last_used;
364 iop_desc_clear_next_desc(alloc_start);
365 iop_desc_clear_next_desc(alloc_tail);
366 return alloc_tail;
367 }
368 }
369 if (!retry++)
370 goto retry;
371
372 /* try to free some slots if the allocation fails */
373 tasklet_schedule(&iop_chan->irq_tasklet);
374
375 return NULL;
376}
377
378static dma_cookie_t
379iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
380 struct iop_adma_desc_slot *desc)
381{
382 dma_cookie_t cookie = iop_chan->common.cookie;
383 cookie++;
384 if (cookie < 0)
385 cookie = 1;
386 iop_chan->common.cookie = desc->async_tx.cookie = cookie;
387 return cookie;
388}
389
390static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
391{
392 dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
393 iop_chan->pending);
394
395 if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
396 iop_chan->pending = 0;
397 iop_chan_append(iop_chan);
398 }
399}
400
401static dma_cookie_t
402iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
403{
404 struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
405 struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
406 struct iop_adma_desc_slot *grp_start, *old_chain_tail;
407 int slot_cnt;
408 int slots_per_op;
409 dma_cookie_t cookie;
410
411 grp_start = sw_desc->group_head;
412 slot_cnt = grp_start->slot_cnt;
413 slots_per_op = grp_start->slots_per_op;
414
415 spin_lock_bh(&iop_chan->lock);
416 cookie = iop_desc_assign_cookie(iop_chan, sw_desc);
417
418 old_chain_tail = list_entry(iop_chan->chain.prev,
419 struct iop_adma_desc_slot, chain_node);
420 list_splice_init(&sw_desc->async_tx.tx_list,
421 &old_chain_tail->chain_node);
422
423 /* fix up the hardware chain */
424 iop_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
425
426 /* 1/ don't add pre-chained descriptors
427 * 2/ dummy read to flush next_desc write
428 */
429 BUG_ON(iop_desc_get_next_desc(sw_desc));
430
431 /* increment the pending count by the number of slots
432 * memcpy operations have a 1:1 (slot:operation) relation
433 * other operations are heavier and will pop the threshold
434 * more often.
435 */
436 iop_chan->pending += slot_cnt;
437 iop_adma_check_threshold(iop_chan);
438 spin_unlock_bh(&iop_chan->lock);
439
440 dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
441 __FUNCTION__, sw_desc->async_tx.cookie, sw_desc->idx);
442
443 return cookie;
444}
445
446static void
447iop_adma_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
448 int index)
449{
450 struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
451 struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
452
453 /* to do: support transfers lengths > IOP_ADMA_MAX_BYTE_COUNT */
454 iop_desc_set_dest_addr(sw_desc->group_head, iop_chan, addr);
455}
456
457static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
458static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
459
460/* returns the number of allocated descriptors */
461static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
462{
463 char *hw_desc;
464 int idx;
465 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
466 struct iop_adma_desc_slot *slot = NULL;
467 int init = iop_chan->slots_allocated ? 0 : 1;
468 struct iop_adma_platform_data *plat_data =
469 iop_chan->device->pdev->dev.platform_data;
470 int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
471
472 /* Allocate descriptor slots */
473 do {
474 idx = iop_chan->slots_allocated;
475 if (idx == num_descs_in_pool)
476 break;
477
478 slot = kzalloc(sizeof(*slot), GFP_KERNEL);
479 if (!slot) {
480 printk(KERN_INFO "IOP ADMA Channel only initialized"
481 " %d descriptor slots", idx);
482 break;
483 }
484 hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
485 slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
486
487 dma_async_tx_descriptor_init(&slot->async_tx, chan);
488 slot->async_tx.tx_submit = iop_adma_tx_submit;
489 slot->async_tx.tx_set_dest = iop_adma_set_dest;
490 INIT_LIST_HEAD(&slot->chain_node);
491 INIT_LIST_HEAD(&slot->slot_node);
492 INIT_LIST_HEAD(&slot->async_tx.tx_list);
493 hw_desc = (char *) iop_chan->device->dma_desc_pool;
494 slot->async_tx.phys =
495 (dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
496 slot->idx = idx;
497
498 spin_lock_bh(&iop_chan->lock);
499 iop_chan->slots_allocated++;
500 list_add_tail(&slot->slot_node, &iop_chan->all_slots);
501 spin_unlock_bh(&iop_chan->lock);
502 } while (iop_chan->slots_allocated < num_descs_in_pool);
503
504 if (idx && !iop_chan->last_used)
505 iop_chan->last_used = list_entry(iop_chan->all_slots.next,
506 struct iop_adma_desc_slot,
507 slot_node);
508
509 dev_dbg(iop_chan->device->common.dev,
510 "allocated %d descriptor slots last_used: %p\n",
511 iop_chan->slots_allocated, iop_chan->last_used);
512
513 /* initialize the channel and the chain with a null operation */
514 if (init) {
515 if (dma_has_cap(DMA_MEMCPY,
516 iop_chan->device->common.cap_mask))
517 iop_chan_start_null_memcpy(iop_chan);
518 else if (dma_has_cap(DMA_XOR,
519 iop_chan->device->common.cap_mask))
520 iop_chan_start_null_xor(iop_chan);
521 else
522 BUG();
523 }
524
525 return (idx > 0) ? idx : -ENOMEM;
526}
527
528static struct dma_async_tx_descriptor *
529iop_adma_prep_dma_interrupt(struct dma_chan *chan)
530{
531 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
532 struct iop_adma_desc_slot *sw_desc, *grp_start;
533 int slot_cnt, slots_per_op;
534
535 dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
536
537 spin_lock_bh(&iop_chan->lock);
538 slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
539 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
540 if (sw_desc) {
541 grp_start = sw_desc->group_head;
542 iop_desc_init_interrupt(grp_start, iop_chan);
543 grp_start->unmap_len = 0;
544 }
545 spin_unlock_bh(&iop_chan->lock);
546
547 return sw_desc ? &sw_desc->async_tx : NULL;
548}
549
550static void
551iop_adma_memcpy_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
552 int index)
553{
554 struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
555 struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
556
557 iop_desc_set_memcpy_src_addr(grp_start, addr);
558}
559
560static struct dma_async_tx_descriptor *
561iop_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en)
562{
563 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
564 struct iop_adma_desc_slot *sw_desc, *grp_start;
565 int slot_cnt, slots_per_op;
566
567 if (unlikely(!len))
568 return NULL;
569 BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
570
571 dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
572 __FUNCTION__, len);
573
574 spin_lock_bh(&iop_chan->lock);
575 slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
576 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
577 if (sw_desc) {
578 grp_start = sw_desc->group_head;
579 iop_desc_init_memcpy(grp_start, int_en);
580 iop_desc_set_byte_count(grp_start, iop_chan, len);
581 sw_desc->unmap_src_cnt = 1;
582 sw_desc->unmap_len = len;
583 sw_desc->async_tx.tx_set_src = iop_adma_memcpy_set_src;
584 }
585 spin_unlock_bh(&iop_chan->lock);
586
587 return sw_desc ? &sw_desc->async_tx : NULL;
588}
589
590static struct dma_async_tx_descriptor *
591iop_adma_prep_dma_memset(struct dma_chan *chan, int value, size_t len,
592 int int_en)
593{
594 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
595 struct iop_adma_desc_slot *sw_desc, *grp_start;
596 int slot_cnt, slots_per_op;
597
598 if (unlikely(!len))
599 return NULL;
600 BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
601
602 dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
603 __FUNCTION__, len);
604
605 spin_lock_bh(&iop_chan->lock);
606 slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op);
607 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
608 if (sw_desc) {
609 grp_start = sw_desc->group_head;
610 iop_desc_init_memset(grp_start, int_en);
611 iop_desc_set_byte_count(grp_start, iop_chan, len);
612 iop_desc_set_block_fill_val(grp_start, value);
613 sw_desc->unmap_src_cnt = 1;
614 sw_desc->unmap_len = len;
615 }
616 spin_unlock_bh(&iop_chan->lock);
617
618 return sw_desc ? &sw_desc->async_tx : NULL;
619}
620
621static void
622iop_adma_xor_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
623 int index)
624{
625 struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
626 struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
627
628 iop_desc_set_xor_src_addr(grp_start, index, addr);
629}
630
631static struct dma_async_tx_descriptor *
632iop_adma_prep_dma_xor(struct dma_chan *chan, unsigned int src_cnt, size_t len,
633 int int_en)
634{
635 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
636 struct iop_adma_desc_slot *sw_desc, *grp_start;
637 int slot_cnt, slots_per_op;
638
639 if (unlikely(!len))
640 return NULL;
641 BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT));
642
643 dev_dbg(iop_chan->device->common.dev,
644 "%s src_cnt: %d len: %u int_en: %d\n",
645 __FUNCTION__, src_cnt, len, int_en);
646
647 spin_lock_bh(&iop_chan->lock);
648 slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
649 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
650 if (sw_desc) {
651 grp_start = sw_desc->group_head;
652 iop_desc_init_xor(grp_start, src_cnt, int_en);
653 iop_desc_set_byte_count(grp_start, iop_chan, len);
654 sw_desc->unmap_src_cnt = src_cnt;
655 sw_desc->unmap_len = len;
656 sw_desc->async_tx.tx_set_src = iop_adma_xor_set_src;
657 }
658 spin_unlock_bh(&iop_chan->lock);
659
660 return sw_desc ? &sw_desc->async_tx : NULL;
661}
662
663static void
664iop_adma_xor_zero_sum_set_src(dma_addr_t addr,
665 struct dma_async_tx_descriptor *tx,
666 int index)
667{
668 struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
669 struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
670
671 iop_desc_set_zero_sum_src_addr(grp_start, index, addr);
672}
673
674static struct dma_async_tx_descriptor *
675iop_adma_prep_dma_zero_sum(struct dma_chan *chan, unsigned int src_cnt,
676 size_t len, u32 *result, int int_en)
677{
678 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
679 struct iop_adma_desc_slot *sw_desc, *grp_start;
680 int slot_cnt, slots_per_op;
681
682 if (unlikely(!len))
683 return NULL;
684
685 dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
686 __FUNCTION__, src_cnt, len);
687
688 spin_lock_bh(&iop_chan->lock);
689 slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
690 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
691 if (sw_desc) {
692 grp_start = sw_desc->group_head;
693 iop_desc_init_zero_sum(grp_start, src_cnt, int_en);
694 iop_desc_set_zero_sum_byte_count(grp_start, len);
695 grp_start->xor_check_result = result;
696 pr_debug("\t%s: grp_start->xor_check_result: %p\n",
697 __FUNCTION__, grp_start->xor_check_result);
698 sw_desc->unmap_src_cnt = src_cnt;
699 sw_desc->unmap_len = len;
700 sw_desc->async_tx.tx_set_src = iop_adma_xor_zero_sum_set_src;
701 }
702 spin_unlock_bh(&iop_chan->lock);
703
704 return sw_desc ? &sw_desc->async_tx : NULL;
705}
706
707static void iop_adma_dependency_added(struct dma_chan *chan)
708{
709 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
710 tasklet_schedule(&iop_chan->irq_tasklet);
711}
712
713static void iop_adma_free_chan_resources(struct dma_chan *chan)
714{
715 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
716 struct iop_adma_desc_slot *iter, *_iter;
717 int in_use_descs = 0;
718
719 iop_adma_slot_cleanup(iop_chan);
720
721 spin_lock_bh(&iop_chan->lock);
722 list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
723 chain_node) {
724 in_use_descs++;
725 list_del(&iter->chain_node);
726 }
727 list_for_each_entry_safe_reverse(
728 iter, _iter, &iop_chan->all_slots, slot_node) {
729 list_del(&iter->slot_node);
730 kfree(iter);
731 iop_chan->slots_allocated--;
732 }
733 iop_chan->last_used = NULL;
734
735 dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
736 __FUNCTION__, iop_chan->slots_allocated);
737 spin_unlock_bh(&iop_chan->lock);
738
739 /* one is ok since we left it on there on purpose */
740 if (in_use_descs > 1)
741 printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
742 in_use_descs - 1);
743}
744
745/**
746 * iop_adma_is_complete - poll the status of an ADMA transaction
747 * @chan: ADMA channel handle
748 * @cookie: ADMA transaction identifier
749 */
750static enum dma_status iop_adma_is_complete(struct dma_chan *chan,
751 dma_cookie_t cookie,
752 dma_cookie_t *done,
753 dma_cookie_t *used)
754{
755 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
756 dma_cookie_t last_used;
757 dma_cookie_t last_complete;
758 enum dma_status ret;
759
760 last_used = chan->cookie;
761 last_complete = iop_chan->completed_cookie;
762
763 if (done)
764 *done = last_complete;
765 if (used)
766 *used = last_used;
767
768 ret = dma_async_is_complete(cookie, last_complete, last_used);
769 if (ret == DMA_SUCCESS)
770 return ret;
771
772 iop_adma_slot_cleanup(iop_chan);
773
774 last_used = chan->cookie;
775 last_complete = iop_chan->completed_cookie;
776
777 if (done)
778 *done = last_complete;
779 if (used)
780 *used = last_used;
781
782 return dma_async_is_complete(cookie, last_complete, last_used);
783}
784
785static irqreturn_t iop_adma_eot_handler(int irq, void *data)
786{
787 struct iop_adma_chan *chan = data;
788
789 dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__);
790
791 tasklet_schedule(&chan->irq_tasklet);
792
793 iop_adma_device_clear_eot_status(chan);
794
795 return IRQ_HANDLED;
796}
797
798static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
799{
800 struct iop_adma_chan *chan = data;
801
802 dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__);
803
804 tasklet_schedule(&chan->irq_tasklet);
805
806 iop_adma_device_clear_eoc_status(chan);
807
808 return IRQ_HANDLED;
809}
810
811static irqreturn_t iop_adma_err_handler(int irq, void *data)
812{
813 struct iop_adma_chan *chan = data;
814 unsigned long status = iop_chan_get_status(chan);
815
816 dev_printk(KERN_ERR, chan->device->common.dev,
817 "error ( %s%s%s%s%s%s%s)\n",
818 iop_is_err_int_parity(status, chan) ? "int_parity " : "",
819 iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
820 iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
821 iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
822 iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
823 iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
824 iop_is_err_split_tx(status, chan) ? "split_tx " : "");
825
826 iop_adma_device_clear_err_status(chan);
827
828 BUG();
829
830 return IRQ_HANDLED;
831}
832
833static void iop_adma_issue_pending(struct dma_chan *chan)
834{
835 struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
836
837 if (iop_chan->pending) {
838 iop_chan->pending = 0;
839 iop_chan_append(iop_chan);
840 }
841}
842
843/*
844 * Perform a transaction to verify the HW works.
845 */
846#define IOP_ADMA_TEST_SIZE 2000
847
848static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
849{
850 int i;
851 void *src, *dest;
852 dma_addr_t src_dma, dest_dma;
853 struct dma_chan *dma_chan;
854 dma_cookie_t cookie;
855 struct dma_async_tx_descriptor *tx;
856 int err = 0;
857 struct iop_adma_chan *iop_chan;
858
859 dev_dbg(device->common.dev, "%s\n", __FUNCTION__);
860
861 src = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL);
862 if (!src)
863 return -ENOMEM;
864 dest = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL);
865 if (!dest) {
866 kfree(src);
867 return -ENOMEM;
868 }
869
870 /* Fill in src buffer */
871 for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
872 ((u8 *) src)[i] = (u8)i;
873
874 memset(dest, 0, IOP_ADMA_TEST_SIZE);
875
876 /* Start copy, using first DMA channel */
877 dma_chan = container_of(device->common.channels.next,
878 struct dma_chan,
879 device_node);
880 if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
881 err = -ENODEV;
882 goto out;
883 }
884
885 tx = iop_adma_prep_dma_memcpy(dma_chan, IOP_ADMA_TEST_SIZE, 1);
886 dest_dma = dma_map_single(dma_chan->device->dev, dest,
887 IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
888 iop_adma_set_dest(dest_dma, tx, 0);
889 src_dma = dma_map_single(dma_chan->device->dev, src,
890 IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
891 iop_adma_memcpy_set_src(src_dma, tx, 0);
892
893 cookie = iop_adma_tx_submit(tx);
894 iop_adma_issue_pending(dma_chan);
895 async_tx_ack(tx);
896 msleep(1);
897
898 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
899 DMA_SUCCESS) {
900 dev_printk(KERN_ERR, dma_chan->device->dev,
901 "Self-test copy timed out, disabling\n");
902 err = -ENODEV;
903 goto free_resources;
904 }
905
906 iop_chan = to_iop_adma_chan(dma_chan);
907 dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
908 IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
909 if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
910 dev_printk(KERN_ERR, dma_chan->device->dev,
911 "Self-test copy failed compare, disabling\n");
912 err = -ENODEV;
913 goto free_resources;
914 }
915
916free_resources:
917 iop_adma_free_chan_resources(dma_chan);
918out:
919 kfree(src);
920 kfree(dest);
921 return err;
922}
923
924#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
925static int __devinit
926iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
927{
928 int i, src_idx;
929 struct page *dest;
930 struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
931 struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
932 dma_addr_t dma_addr, dest_dma;
933 struct dma_async_tx_descriptor *tx;
934 struct dma_chan *dma_chan;
935 dma_cookie_t cookie;
936 u8 cmp_byte = 0;
937 u32 cmp_word;
938 u32 zero_sum_result;
939 int err = 0;
940 struct iop_adma_chan *iop_chan;
941
942 dev_dbg(device->common.dev, "%s\n", __FUNCTION__);
943
944 for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
945 xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
946 if (!xor_srcs[src_idx])
947 while (src_idx--) {
948 __free_page(xor_srcs[src_idx]);
949 return -ENOMEM;
950 }
951 }
952
953 dest = alloc_page(GFP_KERNEL);
954 if (!dest)
955 while (src_idx--) {
956 __free_page(xor_srcs[src_idx]);
957 return -ENOMEM;
958 }
959
960 /* Fill in src buffers */
961 for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
962 u8 *ptr = page_address(xor_srcs[src_idx]);
963 for (i = 0; i < PAGE_SIZE; i++)
964 ptr[i] = (1 << src_idx);
965 }
966
967 for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
968 cmp_byte ^= (u8) (1 << src_idx);
969
970 cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
971 (cmp_byte << 8) | cmp_byte;
972
973 memset(page_address(dest), 0, PAGE_SIZE);
974
975 dma_chan = container_of(device->common.channels.next,
976 struct dma_chan,
977 device_node);
978 if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
979 err = -ENODEV;
980 goto out;
981 }
982
983 /* test xor */
984 tx = iop_adma_prep_dma_xor(dma_chan, IOP_ADMA_NUM_SRC_TEST,
985 PAGE_SIZE, 1);
986 dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
987 PAGE_SIZE, DMA_FROM_DEVICE);
988 iop_adma_set_dest(dest_dma, tx, 0);
989
990 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
991 dma_addr = dma_map_page(dma_chan->device->dev, xor_srcs[i], 0,
992 PAGE_SIZE, DMA_TO_DEVICE);
993 iop_adma_xor_set_src(dma_addr, tx, i);
994 }
995
996 cookie = iop_adma_tx_submit(tx);
997 iop_adma_issue_pending(dma_chan);
998 async_tx_ack(tx);
999 msleep(8);
1000
1001 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
1002 DMA_SUCCESS) {
1003 dev_printk(KERN_ERR, dma_chan->device->dev,
1004 "Self-test xor timed out, disabling\n");
1005 err = -ENODEV;
1006 goto free_resources;
1007 }
1008
1009 iop_chan = to_iop_adma_chan(dma_chan);
1010 dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
1011 PAGE_SIZE, DMA_FROM_DEVICE);
1012 for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
1013 u32 *ptr = page_address(dest);
1014 if (ptr[i] != cmp_word) {
1015 dev_printk(KERN_ERR, dma_chan->device->dev,
1016 "Self-test xor failed compare, disabling\n");
1017 err = -ENODEV;
1018 goto free_resources;
1019 }
1020 }
1021 dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
1022 PAGE_SIZE, DMA_TO_DEVICE);
1023
1024 /* skip zero sum if the capability is not present */
1025 if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
1026 goto free_resources;
1027
1028 /* zero sum the sources with the destintation page */
1029 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
1030 zero_sum_srcs[i] = xor_srcs[i];
1031 zero_sum_srcs[i] = dest;
1032
1033 zero_sum_result = 1;
1034
1035 tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1,
1036 PAGE_SIZE, &zero_sum_result, 1);
1037 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) {
1038 dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i],
1039 0, PAGE_SIZE, DMA_TO_DEVICE);
1040 iop_adma_xor_zero_sum_set_src(dma_addr, tx, i);
1041 }
1042
1043 cookie = iop_adma_tx_submit(tx);
1044 iop_adma_issue_pending(dma_chan);
1045 async_tx_ack(tx);
1046 msleep(8);
1047
1048 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1049 dev_printk(KERN_ERR, dma_chan->device->dev,
1050 "Self-test zero sum timed out, disabling\n");
1051 err = -ENODEV;
1052 goto free_resources;
1053 }
1054
1055 if (zero_sum_result != 0) {
1056 dev_printk(KERN_ERR, dma_chan->device->dev,
1057 "Self-test zero sum failed compare, disabling\n");
1058 err = -ENODEV;
1059 goto free_resources;
1060 }
1061
1062 /* test memset */
1063 tx = iop_adma_prep_dma_memset(dma_chan, 0, PAGE_SIZE, 1);
1064 dma_addr = dma_map_page(dma_chan->device->dev, dest, 0,
1065 PAGE_SIZE, DMA_FROM_DEVICE);
1066 iop_adma_set_dest(dma_addr, tx, 0);
1067
1068 cookie = iop_adma_tx_submit(tx);
1069 iop_adma_issue_pending(dma_chan);
1070 async_tx_ack(tx);
1071 msleep(8);
1072
1073 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1074 dev_printk(KERN_ERR, dma_chan->device->dev,
1075 "Self-test memset timed out, disabling\n");
1076 err = -ENODEV;
1077 goto free_resources;
1078 }
1079
1080 for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
1081 u32 *ptr = page_address(dest);
1082 if (ptr[i]) {
1083 dev_printk(KERN_ERR, dma_chan->device->dev,
1084 "Self-test memset failed compare, disabling\n");
1085 err = -ENODEV;
1086 goto free_resources;
1087 }
1088 }
1089
1090 /* test for non-zero parity sum */
1091 zero_sum_result = 0;
1092 tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1,
1093 PAGE_SIZE, &zero_sum_result, 1);
1094 for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) {
1095 dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i],
1096 0, PAGE_SIZE, DMA_TO_DEVICE);
1097 iop_adma_xor_zero_sum_set_src(dma_addr, tx, i);
1098 }
1099
1100 cookie = iop_adma_tx_submit(tx);
1101 iop_adma_issue_pending(dma_chan);
1102 async_tx_ack(tx);
1103 msleep(8);
1104
1105 if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
1106 dev_printk(KERN_ERR, dma_chan->device->dev,
1107 "Self-test non-zero sum timed out, disabling\n");
1108 err = -ENODEV;
1109 goto free_resources;
1110 }
1111
1112 if (zero_sum_result != 1) {
1113 dev_printk(KERN_ERR, dma_chan->device->dev,
1114 "Self-test non-zero sum failed compare, disabling\n");
1115 err = -ENODEV;
1116 goto free_resources;
1117 }
1118
1119free_resources:
1120 iop_adma_free_chan_resources(dma_chan);
1121out:
1122 src_idx = IOP_ADMA_NUM_SRC_TEST;
1123 while (src_idx--)
1124 __free_page(xor_srcs[src_idx]);
1125 __free_page(dest);
1126 return err;
1127}
1128
1129static int __devexit iop_adma_remove(struct platform_device *dev)
1130{
1131 struct iop_adma_device *device = platform_get_drvdata(dev);
1132 struct dma_chan *chan, *_chan;
1133 struct iop_adma_chan *iop_chan;
1134 int i;
1135 struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
1136
1137 dma_async_device_unregister(&device->common);
1138
1139 for (i = 0; i < 3; i++) {
1140 unsigned int irq;
1141 irq = platform_get_irq(dev, i);
1142 free_irq(irq, device);
1143 }
1144
1145 dma_free_coherent(&dev->dev, plat_data->pool_size,
1146 device->dma_desc_pool_virt, device->dma_desc_pool);
1147
1148 do {
1149 struct resource *res;
1150 res = platform_get_resource(dev, IORESOURCE_MEM, 0);
1151 release_mem_region(res->start, res->end - res->start);
1152 } while (0);
1153
1154 list_for_each_entry_safe(chan, _chan, &device->common.channels,
1155 device_node) {
1156 iop_chan = to_iop_adma_chan(chan);
1157 list_del(&chan->device_node);
1158 kfree(iop_chan);
1159 }
1160 kfree(device);
1161
1162 return 0;
1163}
1164
1165static int __devinit iop_adma_probe(struct platform_device *pdev)
1166{
1167 struct resource *res;
1168 int ret = 0, i;
1169 struct iop_adma_device *adev;
1170 struct iop_adma_chan *iop_chan;
1171 struct dma_device *dma_dev;
1172 struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
1173
1174 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1175 if (!res)
1176 return -ENODEV;
1177
1178 if (!devm_request_mem_region(&pdev->dev, res->start,
1179 res->end - res->start, pdev->name))
1180 return -EBUSY;
1181
1182 adev = kzalloc(sizeof(*adev), GFP_KERNEL);
1183 if (!adev)
1184 return -ENOMEM;
1185 dma_dev = &adev->common;
1186
1187 /* allocate coherent memory for hardware descriptors
1188 * note: writecombine gives slightly better performance, but
1189 * requires that we explicitly flush the writes
1190 */
1191 if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
1192 plat_data->pool_size,
1193 &adev->dma_desc_pool,
1194 GFP_KERNEL)) == NULL) {
1195 ret = -ENOMEM;
1196 goto err_free_adev;
1197 }
1198
1199 dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n",
1200 __FUNCTION__, adev->dma_desc_pool_virt,
1201 (void *) adev->dma_desc_pool);
1202
1203 adev->id = plat_data->hw_id;
1204
1205 /* discover transaction capabilites from the platform data */
1206 dma_dev->cap_mask = plat_data->cap_mask;
1207
1208 adev->pdev = pdev;
1209 platform_set_drvdata(pdev, adev);
1210
1211 INIT_LIST_HEAD(&dma_dev->channels);
1212
1213 /* set base routines */
1214 dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
1215 dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
1216 dma_dev->device_is_tx_complete = iop_adma_is_complete;
1217 dma_dev->device_issue_pending = iop_adma_issue_pending;
1218 dma_dev->device_dependency_added = iop_adma_dependency_added;
1219 dma_dev->dev = &pdev->dev;
1220
1221 /* set prep routines based on capability */
1222 if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
1223 dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
1224 if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
1225 dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset;
1226 if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
1227 dma_dev->max_xor = iop_adma_get_max_xor();
1228 dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
1229 }
1230 if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
1231 dma_dev->device_prep_dma_zero_sum =
1232 iop_adma_prep_dma_zero_sum;
1233 if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
1234 dma_dev->device_prep_dma_interrupt =
1235 iop_adma_prep_dma_interrupt;
1236
1237 iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
1238 if (!iop_chan) {
1239 ret = -ENOMEM;
1240 goto err_free_dma;
1241 }
1242 iop_chan->device = adev;
1243
1244 iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
1245 res->end - res->start);
1246 if (!iop_chan->mmr_base) {
1247 ret = -ENOMEM;
1248 goto err_free_iop_chan;
1249 }
1250 tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long)
1251 iop_chan);
1252
1253 /* clear errors before enabling interrupts */
1254 iop_adma_device_clear_err_status(iop_chan);
1255
1256 for (i = 0; i < 3; i++) {
1257 irq_handler_t handler[] = { iop_adma_eot_handler,
1258 iop_adma_eoc_handler,
1259 iop_adma_err_handler };
1260 int irq = platform_get_irq(pdev, i);
1261 if (irq < 0) {
1262 ret = -ENXIO;
1263 goto err_free_iop_chan;
1264 } else {
1265 ret = devm_request_irq(&pdev->dev, irq,
1266 handler[i], 0, pdev->name, iop_chan);
1267 if (ret)
1268 goto err_free_iop_chan;
1269 }
1270 }
1271
1272 spin_lock_init(&iop_chan->lock);
1273 init_timer(&iop_chan->cleanup_watchdog);
1274 iop_chan->cleanup_watchdog.data = (unsigned long) iop_chan;
1275 iop_chan->cleanup_watchdog.function = iop_adma_tasklet;
1276 INIT_LIST_HEAD(&iop_chan->chain);
1277 INIT_LIST_HEAD(&iop_chan->all_slots);
1278 INIT_RCU_HEAD(&iop_chan->common.rcu);
1279 iop_chan->common.device = dma_dev;
1280 list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
1281
1282 if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
1283 ret = iop_adma_memcpy_self_test(adev);
1284 dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
1285 if (ret)
1286 goto err_free_iop_chan;
1287 }
1288
1289 if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
1290 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
1291 ret = iop_adma_xor_zero_sum_self_test(adev);
1292 dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
1293 if (ret)
1294 goto err_free_iop_chan;
1295 }
1296
1297 dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
1298 "( %s%s%s%s%s%s%s%s%s%s)\n",
1299 dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
1300 dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
1301 dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
1302 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
1303 dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
1304 dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
1305 dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
1306 dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
1307 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
1308 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
1309
1310 dma_async_device_register(dma_dev);
1311 goto out;
1312
1313 err_free_iop_chan:
1314 kfree(iop_chan);
1315 err_free_dma:
1316 dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
1317 adev->dma_desc_pool_virt, adev->dma_desc_pool);
1318 err_free_adev:
1319 kfree(adev);
1320 out:
1321 return ret;
1322}
1323
1324static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
1325{
1326 struct iop_adma_desc_slot *sw_desc, *grp_start;
1327 dma_cookie_t cookie;
1328 int slot_cnt, slots_per_op;
1329
1330 dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
1331
1332 spin_lock_bh(&iop_chan->lock);
1333 slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
1334 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
1335 if (sw_desc) {
1336 grp_start = sw_desc->group_head;
1337
1338 list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
1339 sw_desc->async_tx.ack = 1;
1340 iop_desc_init_memcpy(grp_start, 0);
1341 iop_desc_set_byte_count(grp_start, iop_chan, 0);
1342 iop_desc_set_dest_addr(grp_start, iop_chan, 0);
1343 iop_desc_set_memcpy_src_addr(grp_start, 0);
1344
1345 cookie = iop_chan->common.cookie;
1346 cookie++;
1347 if (cookie <= 1)
1348 cookie = 2;
1349
1350 /* initialize the completed cookie to be less than
1351 * the most recently used cookie
1352 */
1353 iop_chan->completed_cookie = cookie - 1;
1354 iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
1355
1356 /* channel should not be busy */
1357 BUG_ON(iop_chan_is_busy(iop_chan));
1358
1359 /* clear any prior error-status bits */
1360 iop_adma_device_clear_err_status(iop_chan);
1361
1362 /* disable operation */
1363 iop_chan_disable(iop_chan);
1364
1365 /* set the descriptor address */
1366 iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
1367
1368 /* 1/ don't add pre-chained descriptors
1369 * 2/ dummy read to flush next_desc write
1370 */
1371 BUG_ON(iop_desc_get_next_desc(sw_desc));
1372
1373 /* run the descriptor */
1374 iop_chan_enable(iop_chan);
1375 } else
1376 dev_printk(KERN_ERR, iop_chan->device->common.dev,
1377 "failed to allocate null descriptor\n");
1378 spin_unlock_bh(&iop_chan->lock);
1379}
1380
1381static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
1382{
1383 struct iop_adma_desc_slot *sw_desc, *grp_start;
1384 dma_cookie_t cookie;
1385 int slot_cnt, slots_per_op;
1386
1387 dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
1388
1389 spin_lock_bh(&iop_chan->lock);
1390 slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
1391 sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
1392 if (sw_desc) {
1393 grp_start = sw_desc->group_head;
1394 list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
1395 sw_desc->async_tx.ack = 1;
1396 iop_desc_init_null_xor(grp_start, 2, 0);
1397 iop_desc_set_byte_count(grp_start, iop_chan, 0);
1398 iop_desc_set_dest_addr(grp_start, iop_chan, 0);
1399 iop_desc_set_xor_src_addr(grp_start, 0, 0);
1400 iop_desc_set_xor_src_addr(grp_start, 1, 0);
1401
1402 cookie = iop_chan->common.cookie;
1403 cookie++;
1404 if (cookie <= 1)
1405 cookie = 2;
1406
1407 /* initialize the completed cookie to be less than
1408 * the most recently used cookie
1409 */
1410 iop_chan->completed_cookie = cookie - 1;
1411 iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
1412
1413 /* channel should not be busy */
1414 BUG_ON(iop_chan_is_busy(iop_chan));
1415
1416 /* clear any prior error-status bits */
1417 iop_adma_device_clear_err_status(iop_chan);
1418
1419 /* disable operation */
1420 iop_chan_disable(iop_chan);
1421
1422 /* set the descriptor address */
1423 iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
1424
1425 /* 1/ don't add pre-chained descriptors
1426 * 2/ dummy read to flush next_desc write
1427 */
1428 BUG_ON(iop_desc_get_next_desc(sw_desc));
1429
1430 /* run the descriptor */
1431 iop_chan_enable(iop_chan);
1432 } else
1433 dev_printk(KERN_ERR, iop_chan->device->common.dev,
1434 "failed to allocate null descriptor\n");
1435 spin_unlock_bh(&iop_chan->lock);
1436}
1437
1438static struct platform_driver iop_adma_driver = {
1439 .probe = iop_adma_probe,
1440 .remove = iop_adma_remove,
1441 .driver = {
1442 .owner = THIS_MODULE,
1443 .name = "iop-adma",
1444 },
1445};
1446
1447static int __init iop_adma_init (void)
1448{
1449 /* it's currently unsafe to unload this module */
1450 /* if forced, worst case is that rmmod hangs */
1451 __unsafe(THIS_MODULE);
1452
1453 return platform_driver_register(&iop_adma_driver);
1454}
1455
1456static void __exit iop_adma_exit (void)
1457{
1458 platform_driver_unregister(&iop_adma_driver);
1459 return;
1460}
1461
1462module_init(iop_adma_init);
1463module_exit(iop_adma_exit);
1464
1465MODULE_AUTHOR("Intel Corporation");
1466MODULE_DESCRIPTION("IOP ADMA Engine Driver");
1467MODULE_LICENSE("GPL");
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 466909f38d98..64bf3a81db93 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -109,6 +109,8 @@ config MD_RAID10
109config MD_RAID456 109config MD_RAID456
110 tristate "RAID-4/RAID-5/RAID-6 mode" 110 tristate "RAID-4/RAID-5/RAID-6 mode"
111 depends on BLK_DEV_MD 111 depends on BLK_DEV_MD
112 select ASYNC_MEMCPY
113 select ASYNC_XOR
112 ---help--- 114 ---help---
113 A RAID-5 set of N drives with a capacity of C MB per drive provides 115 A RAID-5 set of N drives with a capacity of C MB per drive provides
114 the capacity of C * (N - 1) MB, and protects against a failure 116 the capacity of C * (N - 1) MB, and protects against a failure
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 2c45d7683ae9..c49366cdc05d 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -18,7 +18,7 @@ raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
18hostprogs-y := mktables 18hostprogs-y := mktables
19 19
20# Note: link order is important. All raid personalities 20# Note: link order is important. All raid personalities
21# and xor.o must come before md.o, as they each initialise 21# and must come before md.o, as they each initialise
22# themselves, and md.o may use the personalities when it 22# themselves, and md.o may use the personalities when it
23# auto-initialised. 23# auto-initialised.
24 24
@@ -26,7 +26,7 @@ obj-$(CONFIG_MD_LINEAR) += linear.o
26obj-$(CONFIG_MD_RAID0) += raid0.o 26obj-$(CONFIG_MD_RAID0) += raid0.o
27obj-$(CONFIG_MD_RAID1) += raid1.o 27obj-$(CONFIG_MD_RAID1) += raid1.o
28obj-$(CONFIG_MD_RAID10) += raid10.o 28obj-$(CONFIG_MD_RAID10) += raid10.o
29obj-$(CONFIG_MD_RAID456) += raid456.o xor.o 29obj-$(CONFIG_MD_RAID456) += raid456.o
30obj-$(CONFIG_MD_MULTIPATH) += multipath.o 30obj-$(CONFIG_MD_MULTIPATH) += multipath.o
31obj-$(CONFIG_MD_FAULTY) += faulty.o 31obj-$(CONFIG_MD_FAULTY) += faulty.o
32obj-$(CONFIG_BLK_DEV_MD) += md-mod.o 32obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1c54f3c1cca7..33beaa7da085 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5814,7 +5814,7 @@ static __exit void md_exit(void)
5814 } 5814 }
5815} 5815}
5816 5816
5817module_init(md_init) 5817subsys_initcall(md_init);
5818module_exit(md_exit) 5818module_exit(md_exit)
5819 5819
5820static int get_ro(char *buffer, struct kernel_param *kp) 5820static int get_ro(char *buffer, struct kernel_param *kp)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 061375ee6592..0b66afef2d82 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -52,6 +52,7 @@
52#include "raid6.h" 52#include "raid6.h"
53 53
54#include <linux/raid/bitmap.h> 54#include <linux/raid/bitmap.h>
55#include <linux/async_tx.h>
55 56
56/* 57/*
57 * Stripe cache 58 * Stripe cache
@@ -80,7 +81,6 @@
80/* 81/*
81 * The following can be used to debug the driver 82 * The following can be used to debug the driver
82 */ 83 */
83#define RAID5_DEBUG 0
84#define RAID5_PARANOIA 1 84#define RAID5_PARANOIA 1
85#if RAID5_PARANOIA && defined(CONFIG_SMP) 85#if RAID5_PARANOIA && defined(CONFIG_SMP)
86# define CHECK_DEVLOCK() assert_spin_locked(&conf->device_lock) 86# define CHECK_DEVLOCK() assert_spin_locked(&conf->device_lock)
@@ -88,8 +88,7 @@
88# define CHECK_DEVLOCK() 88# define CHECK_DEVLOCK()
89#endif 89#endif
90 90
91#define PRINTK(x...) ((void)(RAID5_DEBUG && printk(x))) 91#ifdef DEBUG
92#if RAID5_DEBUG
93#define inline 92#define inline
94#define __inline__ 93#define __inline__
95#endif 94#endif
@@ -104,6 +103,23 @@ static inline int raid6_next_disk(int disk, int raid_disks)
104 disk++; 103 disk++;
105 return (disk < raid_disks) ? disk : 0; 104 return (disk < raid_disks) ? disk : 0;
106} 105}
106
107static void return_io(struct bio *return_bi)
108{
109 struct bio *bi = return_bi;
110 while (bi) {
111 int bytes = bi->bi_size;
112
113 return_bi = bi->bi_next;
114 bi->bi_next = NULL;
115 bi->bi_size = 0;
116 bi->bi_end_io(bi, bytes,
117 test_bit(BIO_UPTODATE, &bi->bi_flags)
118 ? 0 : -EIO);
119 bi = return_bi;
120 }
121}
122
107static void print_raid5_conf (raid5_conf_t *conf); 123static void print_raid5_conf (raid5_conf_t *conf);
108 124
109static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) 125static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
@@ -125,6 +141,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
125 } 141 }
126 md_wakeup_thread(conf->mddev->thread); 142 md_wakeup_thread(conf->mddev->thread);
127 } else { 143 } else {
144 BUG_ON(sh->ops.pending);
128 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 145 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
129 atomic_dec(&conf->preread_active_stripes); 146 atomic_dec(&conf->preread_active_stripes);
130 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) 147 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -152,7 +169,8 @@ static void release_stripe(struct stripe_head *sh)
152 169
153static inline void remove_hash(struct stripe_head *sh) 170static inline void remove_hash(struct stripe_head *sh)
154{ 171{
155 PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector); 172 pr_debug("remove_hash(), stripe %llu\n",
173 (unsigned long long)sh->sector);
156 174
157 hlist_del_init(&sh->hash); 175 hlist_del_init(&sh->hash);
158} 176}
@@ -161,7 +179,8 @@ static inline void insert_hash(raid5_conf_t *conf, struct stripe_head *sh)
161{ 179{
162 struct hlist_head *hp = stripe_hash(conf, sh->sector); 180 struct hlist_head *hp = stripe_hash(conf, sh->sector);
163 181
164 PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector); 182 pr_debug("insert_hash(), stripe %llu\n",
183 (unsigned long long)sh->sector);
165 184
166 CHECK_DEVLOCK(); 185 CHECK_DEVLOCK();
167 hlist_add_head(&sh->hash, hp); 186 hlist_add_head(&sh->hash, hp);
@@ -224,9 +243,10 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
224 243
225 BUG_ON(atomic_read(&sh->count) != 0); 244 BUG_ON(atomic_read(&sh->count) != 0);
226 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); 245 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
227 246 BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete);
247
228 CHECK_DEVLOCK(); 248 CHECK_DEVLOCK();
229 PRINTK("init_stripe called, stripe %llu\n", 249 pr_debug("init_stripe called, stripe %llu\n",
230 (unsigned long long)sh->sector); 250 (unsigned long long)sh->sector);
231 251
232 remove_hash(sh); 252 remove_hash(sh);
@@ -240,11 +260,11 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int
240 for (i = sh->disks; i--; ) { 260 for (i = sh->disks; i--; ) {
241 struct r5dev *dev = &sh->dev[i]; 261 struct r5dev *dev = &sh->dev[i];
242 262
243 if (dev->toread || dev->towrite || dev->written || 263 if (dev->toread || dev->read || dev->towrite || dev->written ||
244 test_bit(R5_LOCKED, &dev->flags)) { 264 test_bit(R5_LOCKED, &dev->flags)) {
245 printk("sector=%llx i=%d %p %p %p %d\n", 265 printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n",
246 (unsigned long long)sh->sector, i, dev->toread, 266 (unsigned long long)sh->sector, i, dev->toread,
247 dev->towrite, dev->written, 267 dev->read, dev->towrite, dev->written,
248 test_bit(R5_LOCKED, &dev->flags)); 268 test_bit(R5_LOCKED, &dev->flags));
249 BUG(); 269 BUG();
250 } 270 }
@@ -260,11 +280,11 @@ static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector, in
260 struct hlist_node *hn; 280 struct hlist_node *hn;
261 281
262 CHECK_DEVLOCK(); 282 CHECK_DEVLOCK();
263 PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); 283 pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
264 hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) 284 hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
265 if (sh->sector == sector && sh->disks == disks) 285 if (sh->sector == sector && sh->disks == disks)
266 return sh; 286 return sh;
267 PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); 287 pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
268 return NULL; 288 return NULL;
269} 289}
270 290
@@ -276,7 +296,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
276{ 296{
277 struct stripe_head *sh; 297 struct stripe_head *sh;
278 298
279 PRINTK("get_stripe, sector %llu\n", (unsigned long long)sector); 299 pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
280 300
281 spin_lock_irq(&conf->device_lock); 301 spin_lock_irq(&conf->device_lock);
282 302
@@ -324,6 +344,579 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector
324 return sh; 344 return sh;
325} 345}
326 346
347/* test_and_ack_op() ensures that we only dequeue an operation once */
348#define test_and_ack_op(op, pend) \
349do { \
350 if (test_bit(op, &sh->ops.pending) && \
351 !test_bit(op, &sh->ops.complete)) { \
352 if (test_and_set_bit(op, &sh->ops.ack)) \
353 clear_bit(op, &pend); \
354 else \
355 ack++; \
356 } else \
357 clear_bit(op, &pend); \
358} while (0)
359
360/* find new work to run, do not resubmit work that is already
361 * in flight
362 */
363static unsigned long get_stripe_work(struct stripe_head *sh)
364{
365 unsigned long pending;
366 int ack = 0;
367
368 pending = sh->ops.pending;
369
370 test_and_ack_op(STRIPE_OP_BIOFILL, pending);
371 test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending);
372 test_and_ack_op(STRIPE_OP_PREXOR, pending);
373 test_and_ack_op(STRIPE_OP_BIODRAIN, pending);
374 test_and_ack_op(STRIPE_OP_POSTXOR, pending);
375 test_and_ack_op(STRIPE_OP_CHECK, pending);
376 if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending))
377 ack++;
378
379 sh->ops.count -= ack;
380 BUG_ON(sh->ops.count < 0);
381
382 return pending;
383}
384
385static int
386raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error);
387static int
388raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error);
389
390static void ops_run_io(struct stripe_head *sh)
391{
392 raid5_conf_t *conf = sh->raid_conf;
393 int i, disks = sh->disks;
394
395 might_sleep();
396
397 for (i = disks; i--; ) {
398 int rw;
399 struct bio *bi;
400 mdk_rdev_t *rdev;
401 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
402 rw = WRITE;
403 else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
404 rw = READ;
405 else
406 continue;
407
408 bi = &sh->dev[i].req;
409
410 bi->bi_rw = rw;
411 if (rw == WRITE)
412 bi->bi_end_io = raid5_end_write_request;
413 else
414 bi->bi_end_io = raid5_end_read_request;
415
416 rcu_read_lock();
417 rdev = rcu_dereference(conf->disks[i].rdev);
418 if (rdev && test_bit(Faulty, &rdev->flags))
419 rdev = NULL;
420 if (rdev)
421 atomic_inc(&rdev->nr_pending);
422 rcu_read_unlock();
423
424 if (rdev) {
425 if (test_bit(STRIPE_SYNCING, &sh->state) ||
426 test_bit(STRIPE_EXPAND_SOURCE, &sh->state) ||
427 test_bit(STRIPE_EXPAND_READY, &sh->state))
428 md_sync_acct(rdev->bdev, STRIPE_SECTORS);
429
430 bi->bi_bdev = rdev->bdev;
431 pr_debug("%s: for %llu schedule op %ld on disc %d\n",
432 __FUNCTION__, (unsigned long long)sh->sector,
433 bi->bi_rw, i);
434 atomic_inc(&sh->count);
435 bi->bi_sector = sh->sector + rdev->data_offset;
436 bi->bi_flags = 1 << BIO_UPTODATE;
437 bi->bi_vcnt = 1;
438 bi->bi_max_vecs = 1;
439 bi->bi_idx = 0;
440 bi->bi_io_vec = &sh->dev[i].vec;
441 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
442 bi->bi_io_vec[0].bv_offset = 0;
443 bi->bi_size = STRIPE_SIZE;
444 bi->bi_next = NULL;
445 if (rw == WRITE &&
446 test_bit(R5_ReWrite, &sh->dev[i].flags))
447 atomic_add(STRIPE_SECTORS,
448 &rdev->corrected_errors);
449 generic_make_request(bi);
450 } else {
451 if (rw == WRITE)
452 set_bit(STRIPE_DEGRADED, &sh->state);
453 pr_debug("skip op %ld on disc %d for sector %llu\n",
454 bi->bi_rw, i, (unsigned long long)sh->sector);
455 clear_bit(R5_LOCKED, &sh->dev[i].flags);
456 set_bit(STRIPE_HANDLE, &sh->state);
457 }
458 }
459}
460
461static struct dma_async_tx_descriptor *
462async_copy_data(int frombio, struct bio *bio, struct page *page,
463 sector_t sector, struct dma_async_tx_descriptor *tx)
464{
465 struct bio_vec *bvl;
466 struct page *bio_page;
467 int i;
468 int page_offset;
469
470 if (bio->bi_sector >= sector)
471 page_offset = (signed)(bio->bi_sector - sector) * 512;
472 else
473 page_offset = (signed)(sector - bio->bi_sector) * -512;
474 bio_for_each_segment(bvl, bio, i) {
475 int len = bio_iovec_idx(bio, i)->bv_len;
476 int clen;
477 int b_offset = 0;
478
479 if (page_offset < 0) {
480 b_offset = -page_offset;
481 page_offset += b_offset;
482 len -= b_offset;
483 }
484
485 if (len > 0 && page_offset + len > STRIPE_SIZE)
486 clen = STRIPE_SIZE - page_offset;
487 else
488 clen = len;
489
490 if (clen > 0) {
491 b_offset += bio_iovec_idx(bio, i)->bv_offset;
492 bio_page = bio_iovec_idx(bio, i)->bv_page;
493 if (frombio)
494 tx = async_memcpy(page, bio_page, page_offset,
495 b_offset, clen,
496 ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_SRC,
497 tx, NULL, NULL);
498 else
499 tx = async_memcpy(bio_page, page, b_offset,
500 page_offset, clen,
501 ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_DST,
502 tx, NULL, NULL);
503 }
504 if (clen < len) /* hit end of page */
505 break;
506 page_offset += len;
507 }
508
509 return tx;
510}
511
512static void ops_complete_biofill(void *stripe_head_ref)
513{
514 struct stripe_head *sh = stripe_head_ref;
515 struct bio *return_bi = NULL;
516 raid5_conf_t *conf = sh->raid_conf;
517 int i, more_to_read = 0;
518
519 pr_debug("%s: stripe %llu\n", __FUNCTION__,
520 (unsigned long long)sh->sector);
521
522 /* clear completed biofills */
523 for (i = sh->disks; i--; ) {
524 struct r5dev *dev = &sh->dev[i];
525 /* check if this stripe has new incoming reads */
526 if (dev->toread)
527 more_to_read++;
528
529 /* acknowledge completion of a biofill operation */
530 /* and check if we need to reply to a read request
531 */
532 if (test_bit(R5_Wantfill, &dev->flags) && !dev->toread) {
533 struct bio *rbi, *rbi2;
534 clear_bit(R5_Wantfill, &dev->flags);
535
536 /* The access to dev->read is outside of the
537 * spin_lock_irq(&conf->device_lock), but is protected
538 * by the STRIPE_OP_BIOFILL pending bit
539 */
540 BUG_ON(!dev->read);
541 rbi = dev->read;
542 dev->read = NULL;
543 while (rbi && rbi->bi_sector <
544 dev->sector + STRIPE_SECTORS) {
545 rbi2 = r5_next_bio(rbi, dev->sector);
546 spin_lock_irq(&conf->device_lock);
547 if (--rbi->bi_phys_segments == 0) {
548 rbi->bi_next = return_bi;
549 return_bi = rbi;
550 }
551 spin_unlock_irq(&conf->device_lock);
552 rbi = rbi2;
553 }
554 }
555 }
556 clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
557 clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
558
559 return_io(return_bi);
560
561 if (more_to_read)
562 set_bit(STRIPE_HANDLE, &sh->state);
563 release_stripe(sh);
564}
565
566static void ops_run_biofill(struct stripe_head *sh)
567{
568 struct dma_async_tx_descriptor *tx = NULL;
569 raid5_conf_t *conf = sh->raid_conf;
570 int i;
571
572 pr_debug("%s: stripe %llu\n", __FUNCTION__,
573 (unsigned long long)sh->sector);
574
575 for (i = sh->disks; i--; ) {
576 struct r5dev *dev = &sh->dev[i];
577 if (test_bit(R5_Wantfill, &dev->flags)) {
578 struct bio *rbi;
579 spin_lock_irq(&conf->device_lock);
580 dev->read = rbi = dev->toread;
581 dev->toread = NULL;
582 spin_unlock_irq(&conf->device_lock);
583 while (rbi && rbi->bi_sector <
584 dev->sector + STRIPE_SECTORS) {
585 tx = async_copy_data(0, rbi, dev->page,
586 dev->sector, tx);
587 rbi = r5_next_bio(rbi, dev->sector);
588 }
589 }
590 }
591
592 atomic_inc(&sh->count);
593 async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
594 ops_complete_biofill, sh);
595}
596
597static void ops_complete_compute5(void *stripe_head_ref)
598{
599 struct stripe_head *sh = stripe_head_ref;
600 int target = sh->ops.target;
601 struct r5dev *tgt = &sh->dev[target];
602
603 pr_debug("%s: stripe %llu\n", __FUNCTION__,
604 (unsigned long long)sh->sector);
605
606 set_bit(R5_UPTODATE, &tgt->flags);
607 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
608 clear_bit(R5_Wantcompute, &tgt->flags);
609 set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
610 set_bit(STRIPE_HANDLE, &sh->state);
611 release_stripe(sh);
612}
613
614static struct dma_async_tx_descriptor *
615ops_run_compute5(struct stripe_head *sh, unsigned long pending)
616{
617 /* kernel stack size limits the total number of disks */
618 int disks = sh->disks;
619 struct page *xor_srcs[disks];
620 int target = sh->ops.target;
621 struct r5dev *tgt = &sh->dev[target];
622 struct page *xor_dest = tgt->page;
623 int count = 0;
624 struct dma_async_tx_descriptor *tx;
625 int i;
626
627 pr_debug("%s: stripe %llu block: %d\n",
628 __FUNCTION__, (unsigned long long)sh->sector, target);
629 BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
630
631 for (i = disks; i--; )
632 if (i != target)
633 xor_srcs[count++] = sh->dev[i].page;
634
635 atomic_inc(&sh->count);
636
637 if (unlikely(count == 1))
638 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
639 0, NULL, ops_complete_compute5, sh);
640 else
641 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
642 ASYNC_TX_XOR_ZERO_DST, NULL,
643 ops_complete_compute5, sh);
644
645 /* ack now if postxor is not set to be run */
646 if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending))
647 async_tx_ack(tx);
648
649 return tx;
650}
651
652static void ops_complete_prexor(void *stripe_head_ref)
653{
654 struct stripe_head *sh = stripe_head_ref;
655
656 pr_debug("%s: stripe %llu\n", __FUNCTION__,
657 (unsigned long long)sh->sector);
658
659 set_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
660}
661
662static struct dma_async_tx_descriptor *
663ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
664{
665 /* kernel stack size limits the total number of disks */
666 int disks = sh->disks;
667 struct page *xor_srcs[disks];
668 int count = 0, pd_idx = sh->pd_idx, i;
669
670 /* existing parity data subtracted */
671 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
672
673 pr_debug("%s: stripe %llu\n", __FUNCTION__,
674 (unsigned long long)sh->sector);
675
676 for (i = disks; i--; ) {
677 struct r5dev *dev = &sh->dev[i];
678 /* Only process blocks that are known to be uptodate */
679 if (dev->towrite && test_bit(R5_Wantprexor, &dev->flags))
680 xor_srcs[count++] = dev->page;
681 }
682
683 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
684 ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
685 ops_complete_prexor, sh);
686
687 return tx;
688}
689
690static struct dma_async_tx_descriptor *
691ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
692{
693 int disks = sh->disks;
694 int pd_idx = sh->pd_idx, i;
695
696 /* check if prexor is active which means only process blocks
697 * that are part of a read-modify-write (Wantprexor)
698 */
699 int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
700
701 pr_debug("%s: stripe %llu\n", __FUNCTION__,
702 (unsigned long long)sh->sector);
703
704 for (i = disks; i--; ) {
705 struct r5dev *dev = &sh->dev[i];
706 struct bio *chosen;
707 int towrite;
708
709 towrite = 0;
710 if (prexor) { /* rmw */
711 if (dev->towrite &&
712 test_bit(R5_Wantprexor, &dev->flags))
713 towrite = 1;
714 } else { /* rcw */
715 if (i != pd_idx && dev->towrite &&
716 test_bit(R5_LOCKED, &dev->flags))
717 towrite = 1;
718 }
719
720 if (towrite) {
721 struct bio *wbi;
722
723 spin_lock(&sh->lock);
724 chosen = dev->towrite;
725 dev->towrite = NULL;
726 BUG_ON(dev->written);
727 wbi = dev->written = chosen;
728 spin_unlock(&sh->lock);
729
730 while (wbi && wbi->bi_sector <
731 dev->sector + STRIPE_SECTORS) {
732 tx = async_copy_data(1, wbi, dev->page,
733 dev->sector, tx);
734 wbi = r5_next_bio(wbi, dev->sector);
735 }
736 }
737 }
738
739 return tx;
740}
741
742static void ops_complete_postxor(void *stripe_head_ref)
743{
744 struct stripe_head *sh = stripe_head_ref;
745
746 pr_debug("%s: stripe %llu\n", __FUNCTION__,
747 (unsigned long long)sh->sector);
748
749 set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
750 set_bit(STRIPE_HANDLE, &sh->state);
751 release_stripe(sh);
752}
753
754static void ops_complete_write(void *stripe_head_ref)
755{
756 struct stripe_head *sh = stripe_head_ref;
757 int disks = sh->disks, i, pd_idx = sh->pd_idx;
758
759 pr_debug("%s: stripe %llu\n", __FUNCTION__,
760 (unsigned long long)sh->sector);
761
762 for (i = disks; i--; ) {
763 struct r5dev *dev = &sh->dev[i];
764 if (dev->written || i == pd_idx)
765 set_bit(R5_UPTODATE, &dev->flags);
766 }
767
768 set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
769 set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
770
771 set_bit(STRIPE_HANDLE, &sh->state);
772 release_stripe(sh);
773}
774
775static void
776ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
777{
778 /* kernel stack size limits the total number of disks */
779 int disks = sh->disks;
780 struct page *xor_srcs[disks];
781
782 int count = 0, pd_idx = sh->pd_idx, i;
783 struct page *xor_dest;
784 int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
785 unsigned long flags;
786 dma_async_tx_callback callback;
787
788 pr_debug("%s: stripe %llu\n", __FUNCTION__,
789 (unsigned long long)sh->sector);
790
791 /* check if prexor is active which means only process blocks
792 * that are part of a read-modify-write (written)
793 */
794 if (prexor) {
795 xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
796 for (i = disks; i--; ) {
797 struct r5dev *dev = &sh->dev[i];
798 if (dev->written)
799 xor_srcs[count++] = dev->page;
800 }
801 } else {
802 xor_dest = sh->dev[pd_idx].page;
803 for (i = disks; i--; ) {
804 struct r5dev *dev = &sh->dev[i];
805 if (i != pd_idx)
806 xor_srcs[count++] = dev->page;
807 }
808 }
809
810 /* check whether this postxor is part of a write */
811 callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ?
812 ops_complete_write : ops_complete_postxor;
813
814 /* 1/ if we prexor'd then the dest is reused as a source
815 * 2/ if we did not prexor then we are redoing the parity
816 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
817 * for the synchronous xor case
818 */
819 flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
820 (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
821
822 atomic_inc(&sh->count);
823
824 if (unlikely(count == 1)) {
825 flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
826 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
827 flags, tx, callback, sh);
828 } else
829 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
830 flags, tx, callback, sh);
831}
832
833static void ops_complete_check(void *stripe_head_ref)
834{
835 struct stripe_head *sh = stripe_head_ref;
836 int pd_idx = sh->pd_idx;
837
838 pr_debug("%s: stripe %llu\n", __FUNCTION__,
839 (unsigned long long)sh->sector);
840
841 if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) &&
842 sh->ops.zero_sum_result == 0)
843 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
844
845 set_bit(STRIPE_OP_CHECK, &sh->ops.complete);
846 set_bit(STRIPE_HANDLE, &sh->state);
847 release_stripe(sh);
848}
849
850static void ops_run_check(struct stripe_head *sh)
851{
852 /* kernel stack size limits the total number of disks */
853 int disks = sh->disks;
854 struct page *xor_srcs[disks];
855 struct dma_async_tx_descriptor *tx;
856
857 int count = 0, pd_idx = sh->pd_idx, i;
858 struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
859
860 pr_debug("%s: stripe %llu\n", __FUNCTION__,
861 (unsigned long long)sh->sector);
862
863 for (i = disks; i--; ) {
864 struct r5dev *dev = &sh->dev[i];
865 if (i != pd_idx)
866 xor_srcs[count++] = dev->page;
867 }
868
869 tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
870 &sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
871
872 if (tx)
873 set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);
874 else
875 clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);
876
877 atomic_inc(&sh->count);
878 tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
879 ops_complete_check, sh);
880}
881
882static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
883{
884 int overlap_clear = 0, i, disks = sh->disks;
885 struct dma_async_tx_descriptor *tx = NULL;
886
887 if (test_bit(STRIPE_OP_BIOFILL, &pending)) {
888 ops_run_biofill(sh);
889 overlap_clear++;
890 }
891
892 if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending))
893 tx = ops_run_compute5(sh, pending);
894
895 if (test_bit(STRIPE_OP_PREXOR, &pending))
896 tx = ops_run_prexor(sh, tx);
897
898 if (test_bit(STRIPE_OP_BIODRAIN, &pending)) {
899 tx = ops_run_biodrain(sh, tx);
900 overlap_clear++;
901 }
902
903 if (test_bit(STRIPE_OP_POSTXOR, &pending))
904 ops_run_postxor(sh, tx);
905
906 if (test_bit(STRIPE_OP_CHECK, &pending))
907 ops_run_check(sh);
908
909 if (test_bit(STRIPE_OP_IO, &pending))
910 ops_run_io(sh);
911
912 if (overlap_clear)
913 for (i = disks; i--; ) {
914 struct r5dev *dev = &sh->dev[i];
915 if (test_and_clear_bit(R5_Overlap, &dev->flags))
916 wake_up(&sh->raid_conf->wait_for_overlap);
917 }
918}
919
327static int grow_one_stripe(raid5_conf_t *conf) 920static int grow_one_stripe(raid5_conf_t *conf)
328{ 921{
329 struct stripe_head *sh; 922 struct stripe_head *sh;
@@ -537,8 +1130,8 @@ static int raid5_end_read_request(struct bio * bi, unsigned int bytes_done,
537 if (bi == &sh->dev[i].req) 1130 if (bi == &sh->dev[i].req)
538 break; 1131 break;
539 1132
540 PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n", 1133 pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n",
541 (unsigned long long)sh->sector, i, atomic_read(&sh->count), 1134 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
542 uptodate); 1135 uptodate);
543 if (i == disks) { 1136 if (i == disks) {
544 BUG(); 1137 BUG();
@@ -613,7 +1206,7 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
613 if (bi == &sh->dev[i].req) 1206 if (bi == &sh->dev[i].req)
614 break; 1207 break;
615 1208
616 PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n", 1209 pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
617 (unsigned long long)sh->sector, i, atomic_read(&sh->count), 1210 (unsigned long long)sh->sector, i, atomic_read(&sh->count),
618 uptodate); 1211 uptodate);
619 if (i == disks) { 1212 if (i == disks) {
@@ -658,7 +1251,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
658{ 1251{
659 char b[BDEVNAME_SIZE]; 1252 char b[BDEVNAME_SIZE];
660 raid5_conf_t *conf = (raid5_conf_t *) mddev->private; 1253 raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
661 PRINTK("raid5: error called\n"); 1254 pr_debug("raid5: error called\n");
662 1255
663 if (!test_bit(Faulty, &rdev->flags)) { 1256 if (!test_bit(Faulty, &rdev->flags)) {
664 set_bit(MD_CHANGE_DEVS, &mddev->flags); 1257 set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -916,137 +1509,13 @@ static void copy_data(int frombio, struct bio *bio,
916 } 1509 }
917} 1510}
918 1511
919#define check_xor() do { \ 1512#define check_xor() do { \
920 if (count == MAX_XOR_BLOCKS) { \ 1513 if (count == MAX_XOR_BLOCKS) { \
921 xor_block(count, STRIPE_SIZE, ptr); \ 1514 xor_blocks(count, STRIPE_SIZE, dest, ptr);\
922 count = 1; \ 1515 count = 0; \
923 } \ 1516 } \
924 } while(0) 1517 } while(0)
925 1518
926
927static void compute_block(struct stripe_head *sh, int dd_idx)
928{
929 int i, count, disks = sh->disks;
930 void *ptr[MAX_XOR_BLOCKS], *p;
931
932 PRINTK("compute_block, stripe %llu, idx %d\n",
933 (unsigned long long)sh->sector, dd_idx);
934
935 ptr[0] = page_address(sh->dev[dd_idx].page);
936 memset(ptr[0], 0, STRIPE_SIZE);
937 count = 1;
938 for (i = disks ; i--; ) {
939 if (i == dd_idx)
940 continue;
941 p = page_address(sh->dev[i].page);
942 if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
943 ptr[count++] = p;
944 else
945 printk(KERN_ERR "compute_block() %d, stripe %llu, %d"
946 " not present\n", dd_idx,
947 (unsigned long long)sh->sector, i);
948
949 check_xor();
950 }
951 if (count != 1)
952 xor_block(count, STRIPE_SIZE, ptr);
953 set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
954}
955
956static void compute_parity5(struct stripe_head *sh, int method)
957{
958 raid5_conf_t *conf = sh->raid_conf;
959 int i, pd_idx = sh->pd_idx, disks = sh->disks, count;
960 void *ptr[MAX_XOR_BLOCKS];
961 struct bio *chosen;
962
963 PRINTK("compute_parity5, stripe %llu, method %d\n",
964 (unsigned long long)sh->sector, method);
965
966 count = 1;
967 ptr[0] = page_address(sh->dev[pd_idx].page);
968 switch(method) {
969 case READ_MODIFY_WRITE:
970 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags));
971 for (i=disks ; i-- ;) {
972 if (i==pd_idx)
973 continue;
974 if (sh->dev[i].towrite &&
975 test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
976 ptr[count++] = page_address(sh->dev[i].page);
977 chosen = sh->dev[i].towrite;
978 sh->dev[i].towrite = NULL;
979
980 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
981 wake_up(&conf->wait_for_overlap);
982
983 BUG_ON(sh->dev[i].written);
984 sh->dev[i].written = chosen;
985 check_xor();
986 }
987 }
988 break;
989 case RECONSTRUCT_WRITE:
990 memset(ptr[0], 0, STRIPE_SIZE);
991 for (i= disks; i-- ;)
992 if (i!=pd_idx && sh->dev[i].towrite) {
993 chosen = sh->dev[i].towrite;
994 sh->dev[i].towrite = NULL;
995
996 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
997 wake_up(&conf->wait_for_overlap);
998
999 BUG_ON(sh->dev[i].written);
1000 sh->dev[i].written = chosen;
1001 }
1002 break;
1003 case CHECK_PARITY:
1004 break;
1005 }
1006 if (count>1) {
1007 xor_block(count, STRIPE_SIZE, ptr);
1008 count = 1;
1009 }
1010
1011 for (i = disks; i--;)
1012 if (sh->dev[i].written) {
1013 sector_t sector = sh->dev[i].sector;
1014 struct bio *wbi = sh->dev[i].written;
1015 while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
1016 copy_data(1, wbi, sh->dev[i].page, sector);
1017 wbi = r5_next_bio(wbi, sector);
1018 }
1019
1020 set_bit(R5_LOCKED, &sh->dev[i].flags);
1021 set_bit(R5_UPTODATE, &sh->dev[i].flags);
1022 }
1023
1024 switch(method) {
1025 case RECONSTRUCT_WRITE:
1026 case CHECK_PARITY:
1027 for (i=disks; i--;)
1028 if (i != pd_idx) {
1029 ptr[count++] = page_address(sh->dev[i].page);
1030 check_xor();
1031 }
1032 break;
1033 case READ_MODIFY_WRITE:
1034 for (i = disks; i--;)
1035 if (sh->dev[i].written) {
1036 ptr[count++] = page_address(sh->dev[i].page);
1037 check_xor();
1038 }
1039 }
1040 if (count != 1)
1041 xor_block(count, STRIPE_SIZE, ptr);
1042
1043 if (method != CHECK_PARITY) {
1044 set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1045 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1046 } else
1047 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1048}
1049
1050static void compute_parity6(struct stripe_head *sh, int method) 1519static void compute_parity6(struct stripe_head *sh, int method)
1051{ 1520{
1052 raid6_conf_t *conf = sh->raid_conf; 1521 raid6_conf_t *conf = sh->raid_conf;
@@ -1058,7 +1527,7 @@ static void compute_parity6(struct stripe_head *sh, int method)
1058 qd_idx = raid6_next_disk(pd_idx, disks); 1527 qd_idx = raid6_next_disk(pd_idx, disks);
1059 d0_idx = raid6_next_disk(qd_idx, disks); 1528 d0_idx = raid6_next_disk(qd_idx, disks);
1060 1529
1061 PRINTK("compute_parity, stripe %llu, method %d\n", 1530 pr_debug("compute_parity, stripe %llu, method %d\n",
1062 (unsigned long long)sh->sector, method); 1531 (unsigned long long)sh->sector, method);
1063 1532
1064 switch(method) { 1533 switch(method) {
@@ -1132,20 +1601,20 @@ static void compute_parity6(struct stripe_head *sh, int method)
1132static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) 1601static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1133{ 1602{
1134 int i, count, disks = sh->disks; 1603 int i, count, disks = sh->disks;
1135 void *ptr[MAX_XOR_BLOCKS], *p; 1604 void *ptr[MAX_XOR_BLOCKS], *dest, *p;
1136 int pd_idx = sh->pd_idx; 1605 int pd_idx = sh->pd_idx;
1137 int qd_idx = raid6_next_disk(pd_idx, disks); 1606 int qd_idx = raid6_next_disk(pd_idx, disks);
1138 1607
1139 PRINTK("compute_block_1, stripe %llu, idx %d\n", 1608 pr_debug("compute_block_1, stripe %llu, idx %d\n",
1140 (unsigned long long)sh->sector, dd_idx); 1609 (unsigned long long)sh->sector, dd_idx);
1141 1610
1142 if ( dd_idx == qd_idx ) { 1611 if ( dd_idx == qd_idx ) {
1143 /* We're actually computing the Q drive */ 1612 /* We're actually computing the Q drive */
1144 compute_parity6(sh, UPDATE_PARITY); 1613 compute_parity6(sh, UPDATE_PARITY);
1145 } else { 1614 } else {
1146 ptr[0] = page_address(sh->dev[dd_idx].page); 1615 dest = page_address(sh->dev[dd_idx].page);
1147 if (!nozero) memset(ptr[0], 0, STRIPE_SIZE); 1616 if (!nozero) memset(dest, 0, STRIPE_SIZE);
1148 count = 1; 1617 count = 0;
1149 for (i = disks ; i--; ) { 1618 for (i = disks ; i--; ) {
1150 if (i == dd_idx || i == qd_idx) 1619 if (i == dd_idx || i == qd_idx)
1151 continue; 1620 continue;
@@ -1159,8 +1628,8 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
1159 1628
1160 check_xor(); 1629 check_xor();
1161 } 1630 }
1162 if (count != 1) 1631 if (count)
1163 xor_block(count, STRIPE_SIZE, ptr); 1632 xor_blocks(count, STRIPE_SIZE, dest, ptr);
1164 if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); 1633 if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1165 else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); 1634 else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
1166 } 1635 }
@@ -1183,7 +1652,7 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1183 BUG_ON(faila == failb); 1652 BUG_ON(faila == failb);
1184 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } 1653 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
1185 1654
1186 PRINTK("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", 1655 pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
1187 (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb); 1656 (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb);
1188 1657
1189 if ( failb == disks-1 ) { 1658 if ( failb == disks-1 ) {
@@ -1229,7 +1698,79 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
1229 } 1698 }
1230} 1699}
1231 1700
1701static int
1702handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
1703{
1704 int i, pd_idx = sh->pd_idx, disks = sh->disks;
1705 int locked = 0;
1232 1706
1707 if (rcw) {
1708 /* if we are not expanding this is a proper write request, and
1709 * there will be bios with new data to be drained into the
1710 * stripe cache
1711 */
1712 if (!expand) {
1713 set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
1714 sh->ops.count++;
1715 }
1716
1717 set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
1718 sh->ops.count++;
1719
1720 for (i = disks; i--; ) {
1721 struct r5dev *dev = &sh->dev[i];
1722
1723 if (dev->towrite) {
1724 set_bit(R5_LOCKED, &dev->flags);
1725 if (!expand)
1726 clear_bit(R5_UPTODATE, &dev->flags);
1727 locked++;
1728 }
1729 }
1730 } else {
1731 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
1732 test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
1733
1734 set_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
1735 set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
1736 set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
1737
1738 sh->ops.count += 3;
1739
1740 for (i = disks; i--; ) {
1741 struct r5dev *dev = &sh->dev[i];
1742 if (i == pd_idx)
1743 continue;
1744
1745 /* For a read-modify write there may be blocks that are
1746 * locked for reading while others are ready to be
1747 * written so we distinguish these blocks by the
1748 * R5_Wantprexor bit
1749 */
1750 if (dev->towrite &&
1751 (test_bit(R5_UPTODATE, &dev->flags) ||
1752 test_bit(R5_Wantcompute, &dev->flags))) {
1753 set_bit(R5_Wantprexor, &dev->flags);
1754 set_bit(R5_LOCKED, &dev->flags);
1755 clear_bit(R5_UPTODATE, &dev->flags);
1756 locked++;
1757 }
1758 }
1759 }
1760
1761 /* keep the parity disk locked while asynchronous operations
1762 * are in flight
1763 */
1764 set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
1765 clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
1766 locked++;
1767
1768 pr_debug("%s: stripe %llu locked: %d pending: %lx\n",
1769 __FUNCTION__, (unsigned long long)sh->sector,
1770 locked, sh->ops.pending);
1771
1772 return locked;
1773}
1233 1774
1234/* 1775/*
1235 * Each stripe/dev can have one or more bion attached. 1776 * Each stripe/dev can have one or more bion attached.
@@ -1242,7 +1783,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1242 raid5_conf_t *conf = sh->raid_conf; 1783 raid5_conf_t *conf = sh->raid_conf;
1243 int firstwrite=0; 1784 int firstwrite=0;
1244 1785
1245 PRINTK("adding bh b#%llu to stripe s#%llu\n", 1786 pr_debug("adding bh b#%llu to stripe s#%llu\n",
1246 (unsigned long long)bi->bi_sector, 1787 (unsigned long long)bi->bi_sector,
1247 (unsigned long long)sh->sector); 1788 (unsigned long long)sh->sector);
1248 1789
@@ -1271,7 +1812,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1271 spin_unlock_irq(&conf->device_lock); 1812 spin_unlock_irq(&conf->device_lock);
1272 spin_unlock(&sh->lock); 1813 spin_unlock(&sh->lock);
1273 1814
1274 PRINTK("added bi b#%llu to stripe s#%llu, disk %d.\n", 1815 pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
1275 (unsigned long long)bi->bi_sector, 1816 (unsigned long long)bi->bi_sector,
1276 (unsigned long long)sh->sector, dd_idx); 1817 (unsigned long long)sh->sector, dd_idx);
1277 1818
@@ -1326,6 +1867,729 @@ static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
1326 return pd_idx; 1867 return pd_idx;
1327} 1868}
1328 1869
1870static void
1871handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
1872 struct stripe_head_state *s, int disks,
1873 struct bio **return_bi)
1874{
1875 int i;
1876 for (i = disks; i--; ) {
1877 struct bio *bi;
1878 int bitmap_end = 0;
1879
1880 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
1881 mdk_rdev_t *rdev;
1882 rcu_read_lock();
1883 rdev = rcu_dereference(conf->disks[i].rdev);
1884 if (rdev && test_bit(In_sync, &rdev->flags))
1885 /* multiple read failures in one stripe */
1886 md_error(conf->mddev, rdev);
1887 rcu_read_unlock();
1888 }
1889 spin_lock_irq(&conf->device_lock);
1890 /* fail all writes first */
1891 bi = sh->dev[i].towrite;
1892 sh->dev[i].towrite = NULL;
1893 if (bi) {
1894 s->to_write--;
1895 bitmap_end = 1;
1896 }
1897
1898 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1899 wake_up(&conf->wait_for_overlap);
1900
1901 while (bi && bi->bi_sector <
1902 sh->dev[i].sector + STRIPE_SECTORS) {
1903 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
1904 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1905 if (--bi->bi_phys_segments == 0) {
1906 md_write_end(conf->mddev);
1907 bi->bi_next = *return_bi;
1908 *return_bi = bi;
1909 }
1910 bi = nextbi;
1911 }
1912 /* and fail all 'written' */
1913 bi = sh->dev[i].written;
1914 sh->dev[i].written = NULL;
1915 if (bi) bitmap_end = 1;
1916 while (bi && bi->bi_sector <
1917 sh->dev[i].sector + STRIPE_SECTORS) {
1918 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
1919 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1920 if (--bi->bi_phys_segments == 0) {
1921 md_write_end(conf->mddev);
1922 bi->bi_next = *return_bi;
1923 *return_bi = bi;
1924 }
1925 bi = bi2;
1926 }
1927
1928 /* fail any reads if this device is non-operational and
1929 * the data has not reached the cache yet.
1930 */
1931 if (!test_bit(R5_Wantfill, &sh->dev[i].flags) &&
1932 (!test_bit(R5_Insync, &sh->dev[i].flags) ||
1933 test_bit(R5_ReadError, &sh->dev[i].flags))) {
1934 bi = sh->dev[i].toread;
1935 sh->dev[i].toread = NULL;
1936 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1937 wake_up(&conf->wait_for_overlap);
1938 if (bi) s->to_read--;
1939 while (bi && bi->bi_sector <
1940 sh->dev[i].sector + STRIPE_SECTORS) {
1941 struct bio *nextbi =
1942 r5_next_bio(bi, sh->dev[i].sector);
1943 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1944 if (--bi->bi_phys_segments == 0) {
1945 bi->bi_next = *return_bi;
1946 *return_bi = bi;
1947 }
1948 bi = nextbi;
1949 }
1950 }
1951 spin_unlock_irq(&conf->device_lock);
1952 if (bitmap_end)
1953 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
1954 STRIPE_SECTORS, 0, 0);
1955 }
1956
1957}
1958
1959/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks
1960 * to process
1961 */
1962static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
1963 struct stripe_head_state *s, int disk_idx, int disks)
1964{
1965 struct r5dev *dev = &sh->dev[disk_idx];
1966 struct r5dev *failed_dev = &sh->dev[s->failed_num];
1967
1968 /* don't schedule compute operations or reads on the parity block while
1969 * a check is in flight
1970 */
1971 if ((disk_idx == sh->pd_idx) &&
1972 test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
1973 return ~0;
1974
1975 /* is the data in this block needed, and can we get it? */
1976 if (!test_bit(R5_LOCKED, &dev->flags) &&
1977 !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread ||
1978 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
1979 s->syncing || s->expanding || (s->failed &&
1980 (failed_dev->toread || (failed_dev->towrite &&
1981 !test_bit(R5_OVERWRITE, &failed_dev->flags)
1982 ))))) {
1983 /* 1/ We would like to get this block, possibly by computing it,
1984 * but we might not be able to.
1985 *
1986 * 2/ Since parity check operations potentially make the parity
1987 * block !uptodate it will need to be refreshed before any
1988 * compute operations on data disks are scheduled.
1989 *
1990 * 3/ We hold off parity block re-reads until check operations
1991 * have quiesced.
1992 */
1993 if ((s->uptodate == disks - 1) &&
1994 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
1995 set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
1996 set_bit(R5_Wantcompute, &dev->flags);
1997 sh->ops.target = disk_idx;
1998 s->req_compute = 1;
1999 sh->ops.count++;
2000 /* Careful: from this point on 'uptodate' is in the eye
2001 * of raid5_run_ops which services 'compute' operations
2002 * before writes. R5_Wantcompute flags a block that will
2003 * be R5_UPTODATE by the time it is needed for a
2004 * subsequent operation.
2005 */
2006 s->uptodate++;
2007 return 0; /* uptodate + compute == disks */
2008 } else if ((s->uptodate < disks - 1) &&
2009 test_bit(R5_Insync, &dev->flags)) {
2010 /* Note: we hold off compute operations while checks are
2011 * in flight, but we still prefer 'compute' over 'read'
2012 * hence we only read if (uptodate < * disks-1)
2013 */
2014 set_bit(R5_LOCKED, &dev->flags);
2015 set_bit(R5_Wantread, &dev->flags);
2016 if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
2017 sh->ops.count++;
2018 s->locked++;
2019 pr_debug("Reading block %d (sync=%d)\n", disk_idx,
2020 s->syncing);
2021 }
2022 }
2023
2024 return ~0;
2025}
2026
2027static void handle_issuing_new_read_requests5(struct stripe_head *sh,
2028 struct stripe_head_state *s, int disks)
2029{
2030 int i;
2031
2032 /* Clear completed compute operations. Parity recovery
2033 * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
2034 * later on in this routine
2035 */
2036 if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
2037 !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2038 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
2039 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
2040 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2041 }
2042
2043 /* look for blocks to read/compute, skip this if a compute
2044 * is already in flight, or if the stripe contents are in the
2045 * midst of changing due to a write
2046 */
2047 if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
2048 !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) &&
2049 !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
2050 for (i = disks; i--; )
2051 if (__handle_issuing_new_read_requests5(
2052 sh, s, i, disks) == 0)
2053 break;
2054 }
2055 set_bit(STRIPE_HANDLE, &sh->state);
2056}
2057
2058static void handle_issuing_new_read_requests6(struct stripe_head *sh,
2059 struct stripe_head_state *s, struct r6_state *r6s,
2060 int disks)
2061{
2062 int i;
2063 for (i = disks; i--; ) {
2064 struct r5dev *dev = &sh->dev[i];
2065 if (!test_bit(R5_LOCKED, &dev->flags) &&
2066 !test_bit(R5_UPTODATE, &dev->flags) &&
2067 (dev->toread || (dev->towrite &&
2068 !test_bit(R5_OVERWRITE, &dev->flags)) ||
2069 s->syncing || s->expanding ||
2070 (s->failed >= 1 &&
2071 (sh->dev[r6s->failed_num[0]].toread ||
2072 s->to_write)) ||
2073 (s->failed >= 2 &&
2074 (sh->dev[r6s->failed_num[1]].toread ||
2075 s->to_write)))) {
2076 /* we would like to get this block, possibly
2077 * by computing it, but we might not be able to
2078 */
2079 if (s->uptodate == disks-1) {
2080 pr_debug("Computing stripe %llu block %d\n",
2081 (unsigned long long)sh->sector, i);
2082 compute_block_1(sh, i, 0);
2083 s->uptodate++;
2084 } else if ( s->uptodate == disks-2 && s->failed >= 2 ) {
2085 /* Computing 2-failure is *very* expensive; only
2086 * do it if failed >= 2
2087 */
2088 int other;
2089 for (other = disks; other--; ) {
2090 if (other == i)
2091 continue;
2092 if (!test_bit(R5_UPTODATE,
2093 &sh->dev[other].flags))
2094 break;
2095 }
2096 BUG_ON(other < 0);
2097 pr_debug("Computing stripe %llu blocks %d,%d\n",
2098 (unsigned long long)sh->sector,
2099 i, other);
2100 compute_block_2(sh, i, other);
2101 s->uptodate += 2;
2102 } else if (test_bit(R5_Insync, &dev->flags)) {
2103 set_bit(R5_LOCKED, &dev->flags);
2104 set_bit(R5_Wantread, &dev->flags);
2105 s->locked++;
2106 pr_debug("Reading block %d (sync=%d)\n",
2107 i, s->syncing);
2108 }
2109 }
2110 }
2111 set_bit(STRIPE_HANDLE, &sh->state);
2112}
2113
2114
2115/* handle_completed_write_requests
2116 * any written block on an uptodate or failed drive can be returned.
2117 * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
2118 * never LOCKED, so we don't need to test 'failed' directly.
2119 */
2120static void handle_completed_write_requests(raid5_conf_t *conf,
2121 struct stripe_head *sh, int disks, struct bio **return_bi)
2122{
2123 int i;
2124 struct r5dev *dev;
2125
2126 for (i = disks; i--; )
2127 if (sh->dev[i].written) {
2128 dev = &sh->dev[i];
2129 if (!test_bit(R5_LOCKED, &dev->flags) &&
2130 test_bit(R5_UPTODATE, &dev->flags)) {
2131 /* We can return any write requests */
2132 struct bio *wbi, *wbi2;
2133 int bitmap_end = 0;
2134 pr_debug("Return write for disc %d\n", i);
2135 spin_lock_irq(&conf->device_lock);
2136 wbi = dev->written;
2137 dev->written = NULL;
2138 while (wbi && wbi->bi_sector <
2139 dev->sector + STRIPE_SECTORS) {
2140 wbi2 = r5_next_bio(wbi, dev->sector);
2141 if (--wbi->bi_phys_segments == 0) {
2142 md_write_end(conf->mddev);
2143 wbi->bi_next = *return_bi;
2144 *return_bi = wbi;
2145 }
2146 wbi = wbi2;
2147 }
2148 if (dev->towrite == NULL)
2149 bitmap_end = 1;
2150 spin_unlock_irq(&conf->device_lock);
2151 if (bitmap_end)
2152 bitmap_endwrite(conf->mddev->bitmap,
2153 sh->sector,
2154 STRIPE_SECTORS,
2155 !test_bit(STRIPE_DEGRADED, &sh->state),
2156 0);
2157 }
2158 }
2159}
2160
2161static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
2162 struct stripe_head *sh, struct stripe_head_state *s, int disks)
2163{
2164 int rmw = 0, rcw = 0, i;
2165 for (i = disks; i--; ) {
2166 /* would I have to read this buffer for read_modify_write */
2167 struct r5dev *dev = &sh->dev[i];
2168 if ((dev->towrite || i == sh->pd_idx) &&
2169 !test_bit(R5_LOCKED, &dev->flags) &&
2170 !(test_bit(R5_UPTODATE, &dev->flags) ||
2171 test_bit(R5_Wantcompute, &dev->flags))) {
2172 if (test_bit(R5_Insync, &dev->flags))
2173 rmw++;
2174 else
2175 rmw += 2*disks; /* cannot read it */
2176 }
2177 /* Would I have to read this buffer for reconstruct_write */
2178 if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
2179 !test_bit(R5_LOCKED, &dev->flags) &&
2180 !(test_bit(R5_UPTODATE, &dev->flags) ||
2181 test_bit(R5_Wantcompute, &dev->flags))) {
2182 if (test_bit(R5_Insync, &dev->flags)) rcw++;
2183 else
2184 rcw += 2*disks;
2185 }
2186 }
2187 pr_debug("for sector %llu, rmw=%d rcw=%d\n",
2188 (unsigned long long)sh->sector, rmw, rcw);
2189 set_bit(STRIPE_HANDLE, &sh->state);
2190 if (rmw < rcw && rmw > 0)
2191 /* prefer read-modify-write, but need to get some data */
2192 for (i = disks; i--; ) {
2193 struct r5dev *dev = &sh->dev[i];
2194 if ((dev->towrite || i == sh->pd_idx) &&
2195 !test_bit(R5_LOCKED, &dev->flags) &&
2196 !(test_bit(R5_UPTODATE, &dev->flags) ||
2197 test_bit(R5_Wantcompute, &dev->flags)) &&
2198 test_bit(R5_Insync, &dev->flags)) {
2199 if (
2200 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2201 pr_debug("Read_old block "
2202 "%d for r-m-w\n", i);
2203 set_bit(R5_LOCKED, &dev->flags);
2204 set_bit(R5_Wantread, &dev->flags);
2205 if (!test_and_set_bit(
2206 STRIPE_OP_IO, &sh->ops.pending))
2207 sh->ops.count++;
2208 s->locked++;
2209 } else {
2210 set_bit(STRIPE_DELAYED, &sh->state);
2211 set_bit(STRIPE_HANDLE, &sh->state);
2212 }
2213 }
2214 }
2215 if (rcw <= rmw && rcw > 0)
2216 /* want reconstruct write, but need to get some data */
2217 for (i = disks; i--; ) {
2218 struct r5dev *dev = &sh->dev[i];
2219 if (!test_bit(R5_OVERWRITE, &dev->flags) &&
2220 i != sh->pd_idx &&
2221 !test_bit(R5_LOCKED, &dev->flags) &&
2222 !(test_bit(R5_UPTODATE, &dev->flags) ||
2223 test_bit(R5_Wantcompute, &dev->flags)) &&
2224 test_bit(R5_Insync, &dev->flags)) {
2225 if (
2226 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2227 pr_debug("Read_old block "
2228 "%d for Reconstruct\n", i);
2229 set_bit(R5_LOCKED, &dev->flags);
2230 set_bit(R5_Wantread, &dev->flags);
2231 if (!test_and_set_bit(
2232 STRIPE_OP_IO, &sh->ops.pending))
2233 sh->ops.count++;
2234 s->locked++;
2235 } else {
2236 set_bit(STRIPE_DELAYED, &sh->state);
2237 set_bit(STRIPE_HANDLE, &sh->state);
2238 }
2239 }
2240 }
2241 /* now if nothing is locked, and if we have enough data,
2242 * we can start a write request
2243 */
2244 /* since handle_stripe can be called at any time we need to handle the
2245 * case where a compute block operation has been submitted and then a
2246 * subsequent call wants to start a write request. raid5_run_ops only
2247 * handles the case where compute block and postxor are requested
2248 * simultaneously. If this is not the case then new writes need to be
2249 * held off until the compute completes.
2250 */
2251 if ((s->req_compute ||
2252 !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
2253 (s->locked == 0 && (rcw == 0 || rmw == 0) &&
2254 !test_bit(STRIPE_BIT_DELAY, &sh->state)))
2255 s->locked += handle_write_operations5(sh, rcw == 0, 0);
2256}
2257
2258static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
2259 struct stripe_head *sh, struct stripe_head_state *s,
2260 struct r6_state *r6s, int disks)
2261{
2262 int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
2263 int qd_idx = r6s->qd_idx;
2264 for (i = disks; i--; ) {
2265 struct r5dev *dev = &sh->dev[i];
2266 /* Would I have to read this buffer for reconstruct_write */
2267 if (!test_bit(R5_OVERWRITE, &dev->flags)
2268 && i != pd_idx && i != qd_idx
2269 && (!test_bit(R5_LOCKED, &dev->flags)
2270 ) &&
2271 !test_bit(R5_UPTODATE, &dev->flags)) {
2272 if (test_bit(R5_Insync, &dev->flags)) rcw++;
2273 else {
2274 pr_debug("raid6: must_compute: "
2275 "disk %d flags=%#lx\n", i, dev->flags);
2276 must_compute++;
2277 }
2278 }
2279 }
2280 pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
2281 (unsigned long long)sh->sector, rcw, must_compute);
2282 set_bit(STRIPE_HANDLE, &sh->state);
2283
2284 if (rcw > 0)
2285 /* want reconstruct write, but need to get some data */
2286 for (i = disks; i--; ) {
2287 struct r5dev *dev = &sh->dev[i];
2288 if (!test_bit(R5_OVERWRITE, &dev->flags)
2289 && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
2290 && !test_bit(R5_LOCKED, &dev->flags) &&
2291 !test_bit(R5_UPTODATE, &dev->flags) &&
2292 test_bit(R5_Insync, &dev->flags)) {
2293 if (
2294 test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2295 pr_debug("Read_old stripe %llu "
2296 "block %d for Reconstruct\n",
2297 (unsigned long long)sh->sector, i);
2298 set_bit(R5_LOCKED, &dev->flags);
2299 set_bit(R5_Wantread, &dev->flags);
2300 s->locked++;
2301 } else {
2302 pr_debug("Request delayed stripe %llu "
2303 "block %d for Reconstruct\n",
2304 (unsigned long long)sh->sector, i);
2305 set_bit(STRIPE_DELAYED, &sh->state);
2306 set_bit(STRIPE_HANDLE, &sh->state);
2307 }
2308 }
2309 }
2310 /* now if nothing is locked, and if we have enough data, we can start a
2311 * write request
2312 */
2313 if (s->locked == 0 && rcw == 0 &&
2314 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
2315 if (must_compute > 0) {
2316 /* We have failed blocks and need to compute them */
2317 switch (s->failed) {
2318 case 0:
2319 BUG();
2320 case 1:
2321 compute_block_1(sh, r6s->failed_num[0], 0);
2322 break;
2323 case 2:
2324 compute_block_2(sh, r6s->failed_num[0],
2325 r6s->failed_num[1]);
2326 break;
2327 default: /* This request should have been failed? */
2328 BUG();
2329 }
2330 }
2331
2332 pr_debug("Computing parity for stripe %llu\n",
2333 (unsigned long long)sh->sector);
2334 compute_parity6(sh, RECONSTRUCT_WRITE);
2335 /* now every locked buffer is ready to be written */
2336 for (i = disks; i--; )
2337 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
2338 pr_debug("Writing stripe %llu block %d\n",
2339 (unsigned long long)sh->sector, i);
2340 s->locked++;
2341 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2342 }
2343 /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
2344 set_bit(STRIPE_INSYNC, &sh->state);
2345
2346 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2347 atomic_dec(&conf->preread_active_stripes);
2348 if (atomic_read(&conf->preread_active_stripes) <
2349 IO_THRESHOLD)
2350 md_wakeup_thread(conf->mddev->thread);
2351 }
2352 }
2353}
2354
2355static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
2356 struct stripe_head_state *s, int disks)
2357{
2358 set_bit(STRIPE_HANDLE, &sh->state);
2359 /* Take one of the following actions:
2360 * 1/ start a check parity operation if (uptodate == disks)
2361 * 2/ finish a check parity operation and act on the result
2362 * 3/ skip to the writeback section if we previously
2363 * initiated a recovery operation
2364 */
2365 if (s->failed == 0 &&
2366 !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2367 if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
2368 BUG_ON(s->uptodate != disks);
2369 clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
2370 sh->ops.count++;
2371 s->uptodate--;
2372 } else if (
2373 test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
2374 clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
2375 clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
2376
2377 if (sh->ops.zero_sum_result == 0)
2378 /* parity is correct (on disc,
2379 * not in buffer any more)
2380 */
2381 set_bit(STRIPE_INSYNC, &sh->state);
2382 else {
2383 conf->mddev->resync_mismatches +=
2384 STRIPE_SECTORS;
2385 if (test_bit(
2386 MD_RECOVERY_CHECK, &conf->mddev->recovery))
2387 /* don't try to repair!! */
2388 set_bit(STRIPE_INSYNC, &sh->state);
2389 else {
2390 set_bit(STRIPE_OP_COMPUTE_BLK,
2391 &sh->ops.pending);
2392 set_bit(STRIPE_OP_MOD_REPAIR_PD,
2393 &sh->ops.pending);
2394 set_bit(R5_Wantcompute,
2395 &sh->dev[sh->pd_idx].flags);
2396 sh->ops.target = sh->pd_idx;
2397 sh->ops.count++;
2398 s->uptodate++;
2399 }
2400 }
2401 }
2402 }
2403
2404 /* check if we can clear a parity disk reconstruct */
2405 if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
2406 test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
2407
2408 clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
2409 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
2410 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
2411 clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
2412 }
2413
2414 /* Wait for check parity and compute block operations to complete
2415 * before write-back
2416 */
2417 if (!test_bit(STRIPE_INSYNC, &sh->state) &&
2418 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
2419 !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
2420 struct r5dev *dev;
2421 /* either failed parity check, or recovery is happening */
2422 if (s->failed == 0)
2423 s->failed_num = sh->pd_idx;
2424 dev = &sh->dev[s->failed_num];
2425 BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
2426 BUG_ON(s->uptodate != disks);
2427
2428 set_bit(R5_LOCKED, &dev->flags);
2429 set_bit(R5_Wantwrite, &dev->flags);
2430 if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
2431 sh->ops.count++;
2432
2433 clear_bit(STRIPE_DEGRADED, &sh->state);
2434 s->locked++;
2435 set_bit(STRIPE_INSYNC, &sh->state);
2436 }
2437}
2438
2439
2440static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
2441 struct stripe_head_state *s,
2442 struct r6_state *r6s, struct page *tmp_page,
2443 int disks)
2444{
2445 int update_p = 0, update_q = 0;
2446 struct r5dev *dev;
2447 int pd_idx = sh->pd_idx;
2448 int qd_idx = r6s->qd_idx;
2449
2450 set_bit(STRIPE_HANDLE, &sh->state);
2451
2452 BUG_ON(s->failed > 2);
2453 BUG_ON(s->uptodate < disks);
2454 /* Want to check and possibly repair P and Q.
2455 * However there could be one 'failed' device, in which
2456 * case we can only check one of them, possibly using the
2457 * other to generate missing data
2458 */
2459
2460 /* If !tmp_page, we cannot do the calculations,
2461 * but as we have set STRIPE_HANDLE, we will soon be called
2462 * by stripe_handle with a tmp_page - just wait until then.
2463 */
2464 if (tmp_page) {
2465 if (s->failed == r6s->q_failed) {
2466 /* The only possible failed device holds 'Q', so it
2467 * makes sense to check P (If anything else were failed,
2468 * we would have used P to recreate it).
2469 */
2470 compute_block_1(sh, pd_idx, 1);
2471 if (!page_is_zero(sh->dev[pd_idx].page)) {
2472 compute_block_1(sh, pd_idx, 0);
2473 update_p = 1;
2474 }
2475 }
2476 if (!r6s->q_failed && s->failed < 2) {
2477 /* q is not failed, and we didn't use it to generate
2478 * anything, so it makes sense to check it
2479 */
2480 memcpy(page_address(tmp_page),
2481 page_address(sh->dev[qd_idx].page),
2482 STRIPE_SIZE);
2483 compute_parity6(sh, UPDATE_PARITY);
2484 if (memcmp(page_address(tmp_page),
2485 page_address(sh->dev[qd_idx].page),
2486 STRIPE_SIZE) != 0) {
2487 clear_bit(STRIPE_INSYNC, &sh->state);
2488 update_q = 1;
2489 }
2490 }
2491 if (update_p || update_q) {
2492 conf->mddev->resync_mismatches += STRIPE_SECTORS;
2493 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
2494 /* don't try to repair!! */
2495 update_p = update_q = 0;
2496 }
2497
2498 /* now write out any block on a failed drive,
2499 * or P or Q if they need it
2500 */
2501
2502 if (s->failed == 2) {
2503 dev = &sh->dev[r6s->failed_num[1]];
2504 s->locked++;
2505 set_bit(R5_LOCKED, &dev->flags);
2506 set_bit(R5_Wantwrite, &dev->flags);
2507 }
2508 if (s->failed >= 1) {
2509 dev = &sh->dev[r6s->failed_num[0]];
2510 s->locked++;
2511 set_bit(R5_LOCKED, &dev->flags);
2512 set_bit(R5_Wantwrite, &dev->flags);
2513 }
2514
2515 if (update_p) {
2516 dev = &sh->dev[pd_idx];
2517 s->locked++;
2518 set_bit(R5_LOCKED, &dev->flags);
2519 set_bit(R5_Wantwrite, &dev->flags);
2520 }
2521 if (update_q) {
2522 dev = &sh->dev[qd_idx];
2523 s->locked++;
2524 set_bit(R5_LOCKED, &dev->flags);
2525 set_bit(R5_Wantwrite, &dev->flags);
2526 }
2527 clear_bit(STRIPE_DEGRADED, &sh->state);
2528
2529 set_bit(STRIPE_INSYNC, &sh->state);
2530 }
2531}
2532
2533static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
2534 struct r6_state *r6s)
2535{
2536 int i;
2537
2538 /* We have read all the blocks in this stripe and now we need to
2539 * copy some of them into a target stripe for expand.
2540 */
2541 struct dma_async_tx_descriptor *tx = NULL;
2542 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
2543 for (i = 0; i < sh->disks; i++)
2544 if (i != sh->pd_idx && (r6s && i != r6s->qd_idx)) {
2545 int dd_idx, pd_idx, j;
2546 struct stripe_head *sh2;
2547
2548 sector_t bn = compute_blocknr(sh, i);
2549 sector_t s = raid5_compute_sector(bn, conf->raid_disks,
2550 conf->raid_disks -
2551 conf->max_degraded, &dd_idx,
2552 &pd_idx, conf);
2553 sh2 = get_active_stripe(conf, s, conf->raid_disks,
2554 pd_idx, 1);
2555 if (sh2 == NULL)
2556 /* so far only the early blocks of this stripe
2557 * have been requested. When later blocks
2558 * get requested, we will try again
2559 */
2560 continue;
2561 if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
2562 test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) {
2563 /* must have already done this block */
2564 release_stripe(sh2);
2565 continue;
2566 }
2567
2568 /* place all the copies on one channel */
2569 tx = async_memcpy(sh2->dev[dd_idx].page,
2570 sh->dev[i].page, 0, 0, STRIPE_SIZE,
2571 ASYNC_TX_DEP_ACK, tx, NULL, NULL);
2572
2573 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
2574 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
2575 for (j = 0; j < conf->raid_disks; j++)
2576 if (j != sh2->pd_idx &&
2577 (r6s && j != r6s->qd_idx) &&
2578 !test_bit(R5_Expanded, &sh2->dev[j].flags))
2579 break;
2580 if (j == conf->raid_disks) {
2581 set_bit(STRIPE_EXPAND_READY, &sh2->state);
2582 set_bit(STRIPE_HANDLE, &sh2->state);
2583 }
2584 release_stripe(sh2);
2585
2586 /* done submitting copies, wait for them to complete */
2587 if (i + 1 >= sh->disks) {
2588 async_tx_ack(tx);
2589 dma_wait_for_async_tx(tx);
2590 }
2591 }
2592}
1329 2593
1330/* 2594/*
1331 * handle_stripe - do things to a stripe. 2595 * handle_stripe - do things to a stripe.
@@ -1339,81 +2603,70 @@ static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
1339 * schedule a write of some buffers 2603 * schedule a write of some buffers
1340 * return confirmation of parity correctness 2604 * return confirmation of parity correctness
1341 * 2605 *
1342 * Parity calculations are done inside the stripe lock
1343 * buffers are taken off read_list or write_list, and bh_cache buffers 2606 * buffers are taken off read_list or write_list, and bh_cache buffers
1344 * get BH_Lock set before the stripe lock is released. 2607 * get BH_Lock set before the stripe lock is released.
1345 * 2608 *
1346 */ 2609 */
1347 2610
1348static void handle_stripe5(struct stripe_head *sh) 2611static void handle_stripe5(struct stripe_head *sh)
1349{ 2612{
1350 raid5_conf_t *conf = sh->raid_conf; 2613 raid5_conf_t *conf = sh->raid_conf;
1351 int disks = sh->disks; 2614 int disks = sh->disks, i;
1352 struct bio *return_bi= NULL; 2615 struct bio *return_bi = NULL;
1353 struct bio *bi; 2616 struct stripe_head_state s;
1354 int i;
1355 int syncing, expanding, expanded;
1356 int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
1357 int non_overwrite = 0;
1358 int failed_num=0;
1359 struct r5dev *dev; 2617 struct r5dev *dev;
2618 unsigned long pending = 0;
1360 2619
1361 PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n", 2620 memset(&s, 0, sizeof(s));
1362 (unsigned long long)sh->sector, atomic_read(&sh->count), 2621 pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
1363 sh->pd_idx); 2622 "ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state,
2623 atomic_read(&sh->count), sh->pd_idx,
2624 sh->ops.pending, sh->ops.ack, sh->ops.complete);
1364 2625
1365 spin_lock(&sh->lock); 2626 spin_lock(&sh->lock);
1366 clear_bit(STRIPE_HANDLE, &sh->state); 2627 clear_bit(STRIPE_HANDLE, &sh->state);
1367 clear_bit(STRIPE_DELAYED, &sh->state); 2628 clear_bit(STRIPE_DELAYED, &sh->state);
1368 2629
1369 syncing = test_bit(STRIPE_SYNCING, &sh->state); 2630 s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
1370 expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); 2631 s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
1371 expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); 2632 s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
1372 /* Now to look around and see what can be done */ 2633 /* Now to look around and see what can be done */
1373 2634
1374 rcu_read_lock(); 2635 rcu_read_lock();
1375 for (i=disks; i--; ) { 2636 for (i=disks; i--; ) {
1376 mdk_rdev_t *rdev; 2637 mdk_rdev_t *rdev;
1377 dev = &sh->dev[i]; 2638 struct r5dev *dev = &sh->dev[i];
1378 clear_bit(R5_Insync, &dev->flags); 2639 clear_bit(R5_Insync, &dev->flags);
1379 2640
1380 PRINTK("check %d: state 0x%lx read %p write %p written %p\n", 2641 pr_debug("check %d: state 0x%lx toread %p read %p write %p "
1381 i, dev->flags, dev->toread, dev->towrite, dev->written); 2642 "written %p\n", i, dev->flags, dev->toread, dev->read,
1382 /* maybe we can reply to a read */ 2643 dev->towrite, dev->written);
1383 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
1384 struct bio *rbi, *rbi2;
1385 PRINTK("Return read for disc %d\n", i);
1386 spin_lock_irq(&conf->device_lock);
1387 rbi = dev->toread;
1388 dev->toread = NULL;
1389 if (test_and_clear_bit(R5_Overlap, &dev->flags))
1390 wake_up(&conf->wait_for_overlap);
1391 spin_unlock_irq(&conf->device_lock);
1392 while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
1393 copy_data(0, rbi, dev->page, dev->sector);
1394 rbi2 = r5_next_bio(rbi, dev->sector);
1395 spin_lock_irq(&conf->device_lock);
1396 if (--rbi->bi_phys_segments == 0) {
1397 rbi->bi_next = return_bi;
1398 return_bi = rbi;
1399 }
1400 spin_unlock_irq(&conf->device_lock);
1401 rbi = rbi2;
1402 }
1403 }
1404 2644
1405 /* now count some things */ 2645 /* maybe we can request a biofill operation
1406 if (test_bit(R5_LOCKED, &dev->flags)) locked++; 2646 *
1407 if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++; 2647 * new wantfill requests are only permitted while
2648 * STRIPE_OP_BIOFILL is clear
2649 */
2650 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
2651 !test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
2652 set_bit(R5_Wantfill, &dev->flags);
1408 2653
1409 2654 /* now count some things */
1410 if (dev->toread) to_read++; 2655 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
2656 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
2657 if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++;
2658
2659 if (test_bit(R5_Wantfill, &dev->flags))
2660 s.to_fill++;
2661 else if (dev->toread)
2662 s.to_read++;
1411 if (dev->towrite) { 2663 if (dev->towrite) {
1412 to_write++; 2664 s.to_write++;
1413 if (!test_bit(R5_OVERWRITE, &dev->flags)) 2665 if (!test_bit(R5_OVERWRITE, &dev->flags))
1414 non_overwrite++; 2666 s.non_overwrite++;
1415 } 2667 }
1416 if (dev->written) written++; 2668 if (dev->written)
2669 s.written++;
1417 rdev = rcu_dereference(conf->disks[i].rdev); 2670 rdev = rcu_dereference(conf->disks[i].rdev);
1418 if (!rdev || !test_bit(In_sync, &rdev->flags)) { 2671 if (!rdev || !test_bit(In_sync, &rdev->flags)) {
1419 /* The ReadError flag will just be confusing now */ 2672 /* The ReadError flag will just be confusing now */
@@ -1422,306 +2675,131 @@ static void handle_stripe5(struct stripe_head *sh)
1422 } 2675 }
1423 if (!rdev || !test_bit(In_sync, &rdev->flags) 2676 if (!rdev || !test_bit(In_sync, &rdev->flags)
1424 || test_bit(R5_ReadError, &dev->flags)) { 2677 || test_bit(R5_ReadError, &dev->flags)) {
1425 failed++; 2678 s.failed++;
1426 failed_num = i; 2679 s.failed_num = i;
1427 } else 2680 } else
1428 set_bit(R5_Insync, &dev->flags); 2681 set_bit(R5_Insync, &dev->flags);
1429 } 2682 }
1430 rcu_read_unlock(); 2683 rcu_read_unlock();
1431 PRINTK("locked=%d uptodate=%d to_read=%d" 2684
2685 if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
2686 sh->ops.count++;
2687
2688 pr_debug("locked=%d uptodate=%d to_read=%d"
1432 " to_write=%d failed=%d failed_num=%d\n", 2689 " to_write=%d failed=%d failed_num=%d\n",
1433 locked, uptodate, to_read, to_write, failed, failed_num); 2690 s.locked, s.uptodate, s.to_read, s.to_write,
2691 s.failed, s.failed_num);
1434 /* check if the array has lost two devices and, if so, some requests might 2692 /* check if the array has lost two devices and, if so, some requests might
1435 * need to be failed 2693 * need to be failed
1436 */ 2694 */
1437 if (failed > 1 && to_read+to_write+written) { 2695 if (s.failed > 1 && s.to_read+s.to_write+s.written)
1438 for (i=disks; i--; ) { 2696 handle_requests_to_failed_array(conf, sh, &s, disks,
1439 int bitmap_end = 0; 2697 &return_bi);
1440 2698 if (s.failed > 1 && s.syncing) {
1441 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
1442 mdk_rdev_t *rdev;
1443 rcu_read_lock();
1444 rdev = rcu_dereference(conf->disks[i].rdev);
1445 if (rdev && test_bit(In_sync, &rdev->flags))
1446 /* multiple read failures in one stripe */
1447 md_error(conf->mddev, rdev);
1448 rcu_read_unlock();
1449 }
1450
1451 spin_lock_irq(&conf->device_lock);
1452 /* fail all writes first */
1453 bi = sh->dev[i].towrite;
1454 sh->dev[i].towrite = NULL;
1455 if (bi) { to_write--; bitmap_end = 1; }
1456
1457 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1458 wake_up(&conf->wait_for_overlap);
1459
1460 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
1461 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
1462 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1463 if (--bi->bi_phys_segments == 0) {
1464 md_write_end(conf->mddev);
1465 bi->bi_next = return_bi;
1466 return_bi = bi;
1467 }
1468 bi = nextbi;
1469 }
1470 /* and fail all 'written' */
1471 bi = sh->dev[i].written;
1472 sh->dev[i].written = NULL;
1473 if (bi) bitmap_end = 1;
1474 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) {
1475 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
1476 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1477 if (--bi->bi_phys_segments == 0) {
1478 md_write_end(conf->mddev);
1479 bi->bi_next = return_bi;
1480 return_bi = bi;
1481 }
1482 bi = bi2;
1483 }
1484
1485 /* fail any reads if this device is non-operational */
1486 if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
1487 test_bit(R5_ReadError, &sh->dev[i].flags)) {
1488 bi = sh->dev[i].toread;
1489 sh->dev[i].toread = NULL;
1490 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
1491 wake_up(&conf->wait_for_overlap);
1492 if (bi) to_read--;
1493 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
1494 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
1495 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1496 if (--bi->bi_phys_segments == 0) {
1497 bi->bi_next = return_bi;
1498 return_bi = bi;
1499 }
1500 bi = nextbi;
1501 }
1502 }
1503 spin_unlock_irq(&conf->device_lock);
1504 if (bitmap_end)
1505 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
1506 STRIPE_SECTORS, 0, 0);
1507 }
1508 }
1509 if (failed > 1 && syncing) {
1510 md_done_sync(conf->mddev, STRIPE_SECTORS,0); 2699 md_done_sync(conf->mddev, STRIPE_SECTORS,0);
1511 clear_bit(STRIPE_SYNCING, &sh->state); 2700 clear_bit(STRIPE_SYNCING, &sh->state);
1512 syncing = 0; 2701 s.syncing = 0;
1513 } 2702 }
1514 2703
1515 /* might be able to return some write requests if the parity block 2704 /* might be able to return some write requests if the parity block
1516 * is safe, or on a failed drive 2705 * is safe, or on a failed drive
1517 */ 2706 */
1518 dev = &sh->dev[sh->pd_idx]; 2707 dev = &sh->dev[sh->pd_idx];
1519 if ( written && 2708 if ( s.written &&
1520 ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) && 2709 ((test_bit(R5_Insync, &dev->flags) &&
1521 test_bit(R5_UPTODATE, &dev->flags)) 2710 !test_bit(R5_LOCKED, &dev->flags) &&
1522 || (failed == 1 && failed_num == sh->pd_idx)) 2711 test_bit(R5_UPTODATE, &dev->flags)) ||
1523 ) { 2712 (s.failed == 1 && s.failed_num == sh->pd_idx)))
1524 /* any written block on an uptodate or failed drive can be returned. 2713 handle_completed_write_requests(conf, sh, disks, &return_bi);
1525 * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
1526 * never LOCKED, so we don't need to test 'failed' directly.
1527 */
1528 for (i=disks; i--; )
1529 if (sh->dev[i].written) {
1530 dev = &sh->dev[i];
1531 if (!test_bit(R5_LOCKED, &dev->flags) &&
1532 test_bit(R5_UPTODATE, &dev->flags) ) {
1533 /* We can return any write requests */
1534 struct bio *wbi, *wbi2;
1535 int bitmap_end = 0;
1536 PRINTK("Return write for disc %d\n", i);
1537 spin_lock_irq(&conf->device_lock);
1538 wbi = dev->written;
1539 dev->written = NULL;
1540 while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
1541 wbi2 = r5_next_bio(wbi, dev->sector);
1542 if (--wbi->bi_phys_segments == 0) {
1543 md_write_end(conf->mddev);
1544 wbi->bi_next = return_bi;
1545 return_bi = wbi;
1546 }
1547 wbi = wbi2;
1548 }
1549 if (dev->towrite == NULL)
1550 bitmap_end = 1;
1551 spin_unlock_irq(&conf->device_lock);
1552 if (bitmap_end)
1553 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
1554 STRIPE_SECTORS,
1555 !test_bit(STRIPE_DEGRADED, &sh->state), 0);
1556 }
1557 }
1558 }
1559 2714
1560 /* Now we might consider reading some blocks, either to check/generate 2715 /* Now we might consider reading some blocks, either to check/generate
1561 * parity, or to satisfy requests 2716 * parity, or to satisfy requests
1562 * or to load a block that is being partially written. 2717 * or to load a block that is being partially written.
1563 */ 2718 */
1564 if (to_read || non_overwrite || (syncing && (uptodate < disks)) || expanding) { 2719 if (s.to_read || s.non_overwrite ||
1565 for (i=disks; i--;) { 2720 (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding ||
1566 dev = &sh->dev[i]; 2721 test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
1567 if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && 2722 handle_issuing_new_read_requests5(sh, &s, disks);
1568 (dev->toread || 2723
1569 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || 2724 /* Now we check to see if any write operations have recently
1570 syncing || 2725 * completed
1571 expanding || 2726 */
1572 (failed && (sh->dev[failed_num].toread || 2727
1573 (sh->dev[failed_num].towrite && !test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags)))) 2728 /* leave prexor set until postxor is done, allows us to distinguish
1574 ) 2729 * a rmw from a rcw during biodrain
1575 ) { 2730 */
1576 /* we would like to get this block, possibly 2731 if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
1577 * by computing it, but we might not be able to 2732 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
1578 */ 2733
1579 if (uptodate == disks-1) { 2734 clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
1580 PRINTK("Computing block %d\n", i); 2735 clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
1581 compute_block(sh, i); 2736 clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
1582 uptodate++; 2737
1583 } else if (test_bit(R5_Insync, &dev->flags)) { 2738 for (i = disks; i--; )
1584 set_bit(R5_LOCKED, &dev->flags); 2739 clear_bit(R5_Wantprexor, &sh->dev[i].flags);
1585 set_bit(R5_Wantread, &dev->flags);
1586 locked++;
1587 PRINTK("Reading block %d (sync=%d)\n",
1588 i, syncing);
1589 }
1590 }
1591 }
1592 set_bit(STRIPE_HANDLE, &sh->state);
1593 } 2740 }
1594 2741
1595 /* now to consider writing and what else, if anything should be read */ 2742 /* if only POSTXOR is set then this is an 'expand' postxor */
1596 if (to_write) { 2743 if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
1597 int rmw=0, rcw=0; 2744 test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
1598 for (i=disks ; i--;) { 2745
1599 /* would I have to read this buffer for read_modify_write */ 2746 clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
2747 clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
2748 clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
2749
2750 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
2751 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
2752 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
2753
2754 /* All the 'written' buffers and the parity block are ready to
2755 * be written back to disk
2756 */
2757 BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
2758 for (i = disks; i--; ) {
1600 dev = &sh->dev[i]; 2759 dev = &sh->dev[i];
1601 if ((dev->towrite || i == sh->pd_idx) && 2760 if (test_bit(R5_LOCKED, &dev->flags) &&
1602 (!test_bit(R5_LOCKED, &dev->flags) 2761 (i == sh->pd_idx || dev->written)) {
1603 ) && 2762 pr_debug("Writing block %d\n", i);
1604 !test_bit(R5_UPTODATE, &dev->flags)) { 2763 set_bit(R5_Wantwrite, &dev->flags);
1605 if (test_bit(R5_Insync, &dev->flags) 2764 if (!test_and_set_bit(
1606/* && !(!mddev->insync && i == sh->pd_idx) */ 2765 STRIPE_OP_IO, &sh->ops.pending))
1607 ) 2766 sh->ops.count++;
1608 rmw++; 2767 if (!test_bit(R5_Insync, &dev->flags) ||
1609 else rmw += 2*disks; /* cannot read it */ 2768 (i == sh->pd_idx && s.failed == 0))
1610 } 2769 set_bit(STRIPE_INSYNC, &sh->state);
1611 /* Would I have to read this buffer for reconstruct_write */
1612 if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
1613 (!test_bit(R5_LOCKED, &dev->flags)
1614 ) &&
1615 !test_bit(R5_UPTODATE, &dev->flags)) {
1616 if (test_bit(R5_Insync, &dev->flags)) rcw++;
1617 else rcw += 2*disks;
1618 } 2770 }
1619 } 2771 }
1620 PRINTK("for sector %llu, rmw=%d rcw=%d\n", 2772 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
1621 (unsigned long long)sh->sector, rmw, rcw); 2773 atomic_dec(&conf->preread_active_stripes);
1622 set_bit(STRIPE_HANDLE, &sh->state); 2774 if (atomic_read(&conf->preread_active_stripes) <
1623 if (rmw < rcw && rmw > 0) 2775 IO_THRESHOLD)
1624 /* prefer read-modify-write, but need to get some data */ 2776 md_wakeup_thread(conf->mddev->thread);
1625 for (i=disks; i--;) {
1626 dev = &sh->dev[i];
1627 if ((dev->towrite || i == sh->pd_idx) &&
1628 !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
1629 test_bit(R5_Insync, &dev->flags)) {
1630 if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
1631 {
1632 PRINTK("Read_old block %d for r-m-w\n", i);
1633 set_bit(R5_LOCKED, &dev->flags);
1634 set_bit(R5_Wantread, &dev->flags);
1635 locked++;
1636 } else {
1637 set_bit(STRIPE_DELAYED, &sh->state);
1638 set_bit(STRIPE_HANDLE, &sh->state);
1639 }
1640 }
1641 }
1642 if (rcw <= rmw && rcw > 0)
1643 /* want reconstruct write, but need to get some data */
1644 for (i=disks; i--;) {
1645 dev = &sh->dev[i];
1646 if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
1647 !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
1648 test_bit(R5_Insync, &dev->flags)) {
1649 if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
1650 {
1651 PRINTK("Read_old block %d for Reconstruct\n", i);
1652 set_bit(R5_LOCKED, &dev->flags);
1653 set_bit(R5_Wantread, &dev->flags);
1654 locked++;
1655 } else {
1656 set_bit(STRIPE_DELAYED, &sh->state);
1657 set_bit(STRIPE_HANDLE, &sh->state);
1658 }
1659 }
1660 }
1661 /* now if nothing is locked, and if we have enough data, we can start a write request */
1662 if (locked == 0 && (rcw == 0 ||rmw == 0) &&
1663 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
1664 PRINTK("Computing parity...\n");
1665 compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
1666 /* now every locked buffer is ready to be written */
1667 for (i=disks; i--;)
1668 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
1669 PRINTK("Writing block %d\n", i);
1670 locked++;
1671 set_bit(R5_Wantwrite, &sh->dev[i].flags);
1672 if (!test_bit(R5_Insync, &sh->dev[i].flags)
1673 || (i==sh->pd_idx && failed == 0))
1674 set_bit(STRIPE_INSYNC, &sh->state);
1675 }
1676 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
1677 atomic_dec(&conf->preread_active_stripes);
1678 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
1679 md_wakeup_thread(conf->mddev->thread);
1680 }
1681 } 2777 }
1682 } 2778 }
1683 2779
1684 /* maybe we need to check and possibly fix the parity for this stripe 2780 /* Now to consider new write requests and what else, if anything
1685 * Any reads will already have been scheduled, so we just see if enough data 2781 * should be read. We do not handle new writes when:
1686 * is available 2782 * 1/ A 'write' operation (copy+xor) is already in flight.
2783 * 2/ A 'check' operation is in flight, as it may clobber the parity
2784 * block.
1687 */ 2785 */
1688 if (syncing && locked == 0 && 2786 if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
1689 !test_bit(STRIPE_INSYNC, &sh->state)) { 2787 !test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
1690 set_bit(STRIPE_HANDLE, &sh->state); 2788 handle_issuing_new_write_requests5(conf, sh, &s, disks);
1691 if (failed == 0) {
1692 BUG_ON(uptodate != disks);
1693 compute_parity5(sh, CHECK_PARITY);
1694 uptodate--;
1695 if (page_is_zero(sh->dev[sh->pd_idx].page)) {
1696 /* parity is correct (on disc, not in buffer any more) */
1697 set_bit(STRIPE_INSYNC, &sh->state);
1698 } else {
1699 conf->mddev->resync_mismatches += STRIPE_SECTORS;
1700 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
1701 /* don't try to repair!! */
1702 set_bit(STRIPE_INSYNC, &sh->state);
1703 else {
1704 compute_block(sh, sh->pd_idx);
1705 uptodate++;
1706 }
1707 }
1708 }
1709 if (!test_bit(STRIPE_INSYNC, &sh->state)) {
1710 /* either failed parity check, or recovery is happening */
1711 if (failed==0)
1712 failed_num = sh->pd_idx;
1713 dev = &sh->dev[failed_num];
1714 BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
1715 BUG_ON(uptodate != disks);
1716 2789
1717 set_bit(R5_LOCKED, &dev->flags); 2790 /* maybe we need to check and possibly fix the parity for this stripe
1718 set_bit(R5_Wantwrite, &dev->flags); 2791 * Any reads will already have been scheduled, so we just see if enough
1719 clear_bit(STRIPE_DEGRADED, &sh->state); 2792 * data is available. The parity check is held off while parity
1720 locked++; 2793 * dependent operations are in flight.
1721 set_bit(STRIPE_INSYNC, &sh->state); 2794 */
1722 } 2795 if ((s.syncing && s.locked == 0 &&
1723 } 2796 !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
1724 if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 2797 !test_bit(STRIPE_INSYNC, &sh->state)) ||
2798 test_bit(STRIPE_OP_CHECK, &sh->ops.pending) ||
2799 test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending))
2800 handle_parity_checks5(conf, sh, &s, disks);
2801
2802 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
1725 md_done_sync(conf->mddev, STRIPE_SECTORS,1); 2803 md_done_sync(conf->mddev, STRIPE_SECTORS,1);
1726 clear_bit(STRIPE_SYNCING, &sh->state); 2804 clear_bit(STRIPE_SYNCING, &sh->state);
1727 } 2805 }
@@ -1729,186 +2807,102 @@ static void handle_stripe5(struct stripe_head *sh)
1729 /* If the failed drive is just a ReadError, then we might need to progress 2807 /* If the failed drive is just a ReadError, then we might need to progress
1730 * the repair/check process 2808 * the repair/check process
1731 */ 2809 */
1732 if (failed == 1 && ! conf->mddev->ro && 2810 if (s.failed == 1 && !conf->mddev->ro &&
1733 test_bit(R5_ReadError, &sh->dev[failed_num].flags) 2811 test_bit(R5_ReadError, &sh->dev[s.failed_num].flags)
1734 && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags) 2812 && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags)
1735 && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags) 2813 && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags)
1736 ) { 2814 ) {
1737 dev = &sh->dev[failed_num]; 2815 dev = &sh->dev[s.failed_num];
1738 if (!test_bit(R5_ReWrite, &dev->flags)) { 2816 if (!test_bit(R5_ReWrite, &dev->flags)) {
1739 set_bit(R5_Wantwrite, &dev->flags); 2817 set_bit(R5_Wantwrite, &dev->flags);
2818 if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
2819 sh->ops.count++;
1740 set_bit(R5_ReWrite, &dev->flags); 2820 set_bit(R5_ReWrite, &dev->flags);
1741 set_bit(R5_LOCKED, &dev->flags); 2821 set_bit(R5_LOCKED, &dev->flags);
1742 locked++; 2822 s.locked++;
1743 } else { 2823 } else {
1744 /* let's read it back */ 2824 /* let's read it back */
1745 set_bit(R5_Wantread, &dev->flags); 2825 set_bit(R5_Wantread, &dev->flags);
2826 if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
2827 sh->ops.count++;
1746 set_bit(R5_LOCKED, &dev->flags); 2828 set_bit(R5_LOCKED, &dev->flags);
1747 locked++; 2829 s.locked++;
1748 } 2830 }
1749 } 2831 }
1750 2832
1751 if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 2833 /* Finish postxor operations initiated by the expansion
1752 /* Need to write out all blocks after computing parity */ 2834 * process
1753 sh->disks = conf->raid_disks; 2835 */
1754 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks); 2836 if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) &&
1755 compute_parity5(sh, RECONSTRUCT_WRITE); 2837 !test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) {
1756 for (i= conf->raid_disks; i--;) { 2838
1757 set_bit(R5_LOCKED, &sh->dev[i].flags); 2839 clear_bit(STRIPE_EXPANDING, &sh->state);
1758 locked++; 2840
2841 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
2842 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
2843 clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
2844
2845 for (i = conf->raid_disks; i--; ) {
1759 set_bit(R5_Wantwrite, &sh->dev[i].flags); 2846 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2847 if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
2848 sh->ops.count++;
1760 } 2849 }
1761 clear_bit(STRIPE_EXPANDING, &sh->state); 2850 }
1762 } else if (expanded) { 2851
2852 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
2853 !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
2854 /* Need to write out all blocks after computing parity */
2855 sh->disks = conf->raid_disks;
2856 sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
2857 conf->raid_disks);
2858 s.locked += handle_write_operations5(sh, 0, 1);
2859 } else if (s.expanded &&
2860 !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
1763 clear_bit(STRIPE_EXPAND_READY, &sh->state); 2861 clear_bit(STRIPE_EXPAND_READY, &sh->state);
1764 atomic_dec(&conf->reshape_stripes); 2862 atomic_dec(&conf->reshape_stripes);
1765 wake_up(&conf->wait_for_overlap); 2863 wake_up(&conf->wait_for_overlap);
1766 md_done_sync(conf->mddev, STRIPE_SECTORS, 1); 2864 md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
1767 } 2865 }
1768 2866
1769 if (expanding && locked == 0) { 2867 if (s.expanding && s.locked == 0)
1770 /* We have read all the blocks in this stripe and now we need to 2868 handle_stripe_expansion(conf, sh, NULL);
1771 * copy some of them into a target stripe for expand. 2869
1772 */ 2870 if (sh->ops.count)
1773 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); 2871 pending = get_stripe_work(sh);
1774 for (i=0; i< sh->disks; i++)
1775 if (i != sh->pd_idx) {
1776 int dd_idx, pd_idx, j;
1777 struct stripe_head *sh2;
1778
1779 sector_t bn = compute_blocknr(sh, i);
1780 sector_t s = raid5_compute_sector(bn, conf->raid_disks,
1781 conf->raid_disks-1,
1782 &dd_idx, &pd_idx, conf);
1783 sh2 = get_active_stripe(conf, s, conf->raid_disks, pd_idx, 1);
1784 if (sh2 == NULL)
1785 /* so far only the early blocks of this stripe
1786 * have been requested. When later blocks
1787 * get requested, we will try again
1788 */
1789 continue;
1790 if(!test_bit(STRIPE_EXPANDING, &sh2->state) ||
1791 test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) {
1792 /* must have already done this block */
1793 release_stripe(sh2);
1794 continue;
1795 }
1796 memcpy(page_address(sh2->dev[dd_idx].page),
1797 page_address(sh->dev[i].page),
1798 STRIPE_SIZE);
1799 set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
1800 set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
1801 for (j=0; j<conf->raid_disks; j++)
1802 if (j != sh2->pd_idx &&
1803 !test_bit(R5_Expanded, &sh2->dev[j].flags))
1804 break;
1805 if (j == conf->raid_disks) {
1806 set_bit(STRIPE_EXPAND_READY, &sh2->state);
1807 set_bit(STRIPE_HANDLE, &sh2->state);
1808 }
1809 release_stripe(sh2);
1810 }
1811 }
1812 2872
1813 spin_unlock(&sh->lock); 2873 spin_unlock(&sh->lock);
1814 2874
1815 while ((bi=return_bi)) { 2875 if (pending)
1816 int bytes = bi->bi_size; 2876 raid5_run_ops(sh, pending);
1817 2877
1818 return_bi = bi->bi_next; 2878 return_io(return_bi);
1819 bi->bi_next = NULL;
1820 bi->bi_size = 0;
1821 bi->bi_end_io(bi, bytes,
1822 test_bit(BIO_UPTODATE, &bi->bi_flags)
1823 ? 0 : -EIO);
1824 }
1825 for (i=disks; i-- ;) {
1826 int rw;
1827 struct bio *bi;
1828 mdk_rdev_t *rdev;
1829 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
1830 rw = WRITE;
1831 else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
1832 rw = READ;
1833 else
1834 continue;
1835
1836 bi = &sh->dev[i].req;
1837
1838 bi->bi_rw = rw;
1839 if (rw == WRITE)
1840 bi->bi_end_io = raid5_end_write_request;
1841 else
1842 bi->bi_end_io = raid5_end_read_request;
1843
1844 rcu_read_lock();
1845 rdev = rcu_dereference(conf->disks[i].rdev);
1846 if (rdev && test_bit(Faulty, &rdev->flags))
1847 rdev = NULL;
1848 if (rdev)
1849 atomic_inc(&rdev->nr_pending);
1850 rcu_read_unlock();
1851
1852 if (rdev) {
1853 if (syncing || expanding || expanded)
1854 md_sync_acct(rdev->bdev, STRIPE_SECTORS);
1855 2879
1856 bi->bi_bdev = rdev->bdev;
1857 PRINTK("for %llu schedule op %ld on disc %d\n",
1858 (unsigned long long)sh->sector, bi->bi_rw, i);
1859 atomic_inc(&sh->count);
1860 bi->bi_sector = sh->sector + rdev->data_offset;
1861 bi->bi_flags = 1 << BIO_UPTODATE;
1862 bi->bi_vcnt = 1;
1863 bi->bi_max_vecs = 1;
1864 bi->bi_idx = 0;
1865 bi->bi_io_vec = &sh->dev[i].vec;
1866 bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
1867 bi->bi_io_vec[0].bv_offset = 0;
1868 bi->bi_size = STRIPE_SIZE;
1869 bi->bi_next = NULL;
1870 if (rw == WRITE &&
1871 test_bit(R5_ReWrite, &sh->dev[i].flags))
1872 atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
1873 generic_make_request(bi);
1874 } else {
1875 if (rw == WRITE)
1876 set_bit(STRIPE_DEGRADED, &sh->state);
1877 PRINTK("skip op %ld on disc %d for sector %llu\n",
1878 bi->bi_rw, i, (unsigned long long)sh->sector);
1879 clear_bit(R5_LOCKED, &sh->dev[i].flags);
1880 set_bit(STRIPE_HANDLE, &sh->state);
1881 }
1882 }
1883} 2880}
1884 2881
1885static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) 2882static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
1886{ 2883{
1887 raid6_conf_t *conf = sh->raid_conf; 2884 raid6_conf_t *conf = sh->raid_conf;
1888 int disks = sh->disks; 2885 int disks = sh->disks;
1889 struct bio *return_bi= NULL; 2886 struct bio *return_bi = NULL;
1890 struct bio *bi; 2887 int i, pd_idx = sh->pd_idx;
1891 int i; 2888 struct stripe_head_state s;
1892 int syncing, expanding, expanded; 2889 struct r6_state r6s;
1893 int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
1894 int non_overwrite = 0;
1895 int failed_num[2] = {0, 0};
1896 struct r5dev *dev, *pdev, *qdev; 2890 struct r5dev *dev, *pdev, *qdev;
1897 int pd_idx = sh->pd_idx;
1898 int qd_idx = raid6_next_disk(pd_idx, disks);
1899 int p_failed, q_failed;
1900 2891
1901 PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d, qd_idx=%d\n", 2892 r6s.qd_idx = raid6_next_disk(pd_idx, disks);
1902 (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count), 2893 pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
1903 pd_idx, qd_idx); 2894 "pd_idx=%d, qd_idx=%d\n",
2895 (unsigned long long)sh->sector, sh->state,
2896 atomic_read(&sh->count), pd_idx, r6s.qd_idx);
2897 memset(&s, 0, sizeof(s));
1904 2898
1905 spin_lock(&sh->lock); 2899 spin_lock(&sh->lock);
1906 clear_bit(STRIPE_HANDLE, &sh->state); 2900 clear_bit(STRIPE_HANDLE, &sh->state);
1907 clear_bit(STRIPE_DELAYED, &sh->state); 2901 clear_bit(STRIPE_DELAYED, &sh->state);
1908 2902
1909 syncing = test_bit(STRIPE_SYNCING, &sh->state); 2903 s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
1910 expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); 2904 s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
1911 expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); 2905 s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
1912 /* Now to look around and see what can be done */ 2906 /* Now to look around and see what can be done */
1913 2907
1914 rcu_read_lock(); 2908 rcu_read_lock();
@@ -1917,12 +2911,12 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
1917 dev = &sh->dev[i]; 2911 dev = &sh->dev[i];
1918 clear_bit(R5_Insync, &dev->flags); 2912 clear_bit(R5_Insync, &dev->flags);
1919 2913
1920 PRINTK("check %d: state 0x%lx read %p write %p written %p\n", 2914 pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
1921 i, dev->flags, dev->toread, dev->towrite, dev->written); 2915 i, dev->flags, dev->toread, dev->towrite, dev->written);
1922 /* maybe we can reply to a read */ 2916 /* maybe we can reply to a read */
1923 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { 2917 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
1924 struct bio *rbi, *rbi2; 2918 struct bio *rbi, *rbi2;
1925 PRINTK("Return read for disc %d\n", i); 2919 pr_debug("Return read for disc %d\n", i);
1926 spin_lock_irq(&conf->device_lock); 2920 spin_lock_irq(&conf->device_lock);
1927 rbi = dev->toread; 2921 rbi = dev->toread;
1928 dev->toread = NULL; 2922 dev->toread = NULL;
@@ -1943,17 +2937,19 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
1943 } 2937 }
1944 2938
1945 /* now count some things */ 2939 /* now count some things */
1946 if (test_bit(R5_LOCKED, &dev->flags)) locked++; 2940 if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
1947 if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++; 2941 if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
1948 2942
1949 2943
1950 if (dev->toread) to_read++; 2944 if (dev->toread)
2945 s.to_read++;
1951 if (dev->towrite) { 2946 if (dev->towrite) {
1952 to_write++; 2947 s.to_write++;
1953 if (!test_bit(R5_OVERWRITE, &dev->flags)) 2948 if (!test_bit(R5_OVERWRITE, &dev->flags))
1954 non_overwrite++; 2949 s.non_overwrite++;
1955 } 2950 }
1956 if (dev->written) written++; 2951 if (dev->written)
2952 s.written++;
1957 rdev = rcu_dereference(conf->disks[i].rdev); 2953 rdev = rcu_dereference(conf->disks[i].rdev);
1958 if (!rdev || !test_bit(In_sync, &rdev->flags)) { 2954 if (!rdev || !test_bit(In_sync, &rdev->flags)) {
1959 /* The ReadError flag will just be confusing now */ 2955 /* The ReadError flag will just be confusing now */
@@ -1962,96 +2958,27 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
1962 } 2958 }
1963 if (!rdev || !test_bit(In_sync, &rdev->flags) 2959 if (!rdev || !test_bit(In_sync, &rdev->flags)
1964 || test_bit(R5_ReadError, &dev->flags)) { 2960 || test_bit(R5_ReadError, &dev->flags)) {
1965 if ( failed < 2 ) 2961 if (s.failed < 2)
1966 failed_num[failed] = i; 2962 r6s.failed_num[s.failed] = i;
1967 failed++; 2963 s.failed++;
1968 } else 2964 } else
1969 set_bit(R5_Insync, &dev->flags); 2965 set_bit(R5_Insync, &dev->flags);
1970 } 2966 }
1971 rcu_read_unlock(); 2967 rcu_read_unlock();
1972 PRINTK("locked=%d uptodate=%d to_read=%d" 2968 pr_debug("locked=%d uptodate=%d to_read=%d"
1973 " to_write=%d failed=%d failed_num=%d,%d\n", 2969 " to_write=%d failed=%d failed_num=%d,%d\n",
1974 locked, uptodate, to_read, to_write, failed, 2970 s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
1975 failed_num[0], failed_num[1]); 2971 r6s.failed_num[0], r6s.failed_num[1]);
1976 /* check if the array has lost >2 devices and, if so, some requests might 2972 /* check if the array has lost >2 devices and, if so, some requests
1977 * need to be failed 2973 * might need to be failed
1978 */ 2974 */
1979 if (failed > 2 && to_read+to_write+written) { 2975 if (s.failed > 2 && s.to_read+s.to_write+s.written)
1980 for (i=disks; i--; ) { 2976 handle_requests_to_failed_array(conf, sh, &s, disks,
1981 int bitmap_end = 0; 2977 &return_bi);
1982 2978 if (s.failed > 2 && s.syncing) {
1983 if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
1984 mdk_rdev_t *rdev;
1985 rcu_read_lock();
1986 rdev = rcu_dereference(conf->disks[i].rdev);
1987 if (rdev && test_bit(In_sync, &rdev->flags))
1988 /* multiple read failures in one stripe */
1989 md_error(conf->mddev, rdev);
1990 rcu_read_unlock();
1991 }
1992
1993 spin_lock_irq(&conf->device_lock);
1994 /* fail all writes first */
1995 bi = sh->dev[i].towrite;
1996 sh->dev[i].towrite = NULL;
1997 if (bi) { to_write--; bitmap_end = 1; }
1998
1999 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
2000 wake_up(&conf->wait_for_overlap);
2001
2002 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
2003 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
2004 clear_bit(BIO_UPTODATE, &bi->bi_flags);
2005 if (--bi->bi_phys_segments == 0) {
2006 md_write_end(conf->mddev);
2007 bi->bi_next = return_bi;
2008 return_bi = bi;
2009 }
2010 bi = nextbi;
2011 }
2012 /* and fail all 'written' */
2013 bi = sh->dev[i].written;
2014 sh->dev[i].written = NULL;
2015 if (bi) bitmap_end = 1;
2016 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) {
2017 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
2018 clear_bit(BIO_UPTODATE, &bi->bi_flags);
2019 if (--bi->bi_phys_segments == 0) {
2020 md_write_end(conf->mddev);
2021 bi->bi_next = return_bi;
2022 return_bi = bi;
2023 }
2024 bi = bi2;
2025 }
2026
2027 /* fail any reads if this device is non-operational */
2028 if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
2029 test_bit(R5_ReadError, &sh->dev[i].flags)) {
2030 bi = sh->dev[i].toread;
2031 sh->dev[i].toread = NULL;
2032 if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
2033 wake_up(&conf->wait_for_overlap);
2034 if (bi) to_read--;
2035 while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
2036 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
2037 clear_bit(BIO_UPTODATE, &bi->bi_flags);
2038 if (--bi->bi_phys_segments == 0) {
2039 bi->bi_next = return_bi;
2040 return_bi = bi;
2041 }
2042 bi = nextbi;
2043 }
2044 }
2045 spin_unlock_irq(&conf->device_lock);
2046 if (bitmap_end)
2047 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
2048 STRIPE_SECTORS, 0, 0);
2049 }
2050 }
2051 if (failed > 2 && syncing) {
2052 md_done_sync(conf->mddev, STRIPE_SECTORS,0); 2979 md_done_sync(conf->mddev, STRIPE_SECTORS,0);
2053 clear_bit(STRIPE_SYNCING, &sh->state); 2980 clear_bit(STRIPE_SYNCING, &sh->state);
2054 syncing = 0; 2981 s.syncing = 0;
2055 } 2982 }
2056 2983
2057 /* 2984 /*
@@ -2059,279 +2986,41 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2059 * are safe, or on a failed drive 2986 * are safe, or on a failed drive
2060 */ 2987 */
2061 pdev = &sh->dev[pd_idx]; 2988 pdev = &sh->dev[pd_idx];
2062 p_failed = (failed >= 1 && failed_num[0] == pd_idx) 2989 r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx)
2063 || (failed >= 2 && failed_num[1] == pd_idx); 2990 || (s.failed >= 2 && r6s.failed_num[1] == pd_idx);
2064 qdev = &sh->dev[qd_idx]; 2991 qdev = &sh->dev[r6s.qd_idx];
2065 q_failed = (failed >= 1 && failed_num[0] == qd_idx) 2992 r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx)
2066 || (failed >= 2 && failed_num[1] == qd_idx); 2993 || (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx);
2067 2994
2068 if ( written && 2995 if ( s.written &&
2069 ( p_failed || ((test_bit(R5_Insync, &pdev->flags) 2996 ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
2070 && !test_bit(R5_LOCKED, &pdev->flags) 2997 && !test_bit(R5_LOCKED, &pdev->flags)
2071 && test_bit(R5_UPTODATE, &pdev->flags))) ) && 2998 && test_bit(R5_UPTODATE, &pdev->flags)))) &&
2072 ( q_failed || ((test_bit(R5_Insync, &qdev->flags) 2999 ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
2073 && !test_bit(R5_LOCKED, &qdev->flags) 3000 && !test_bit(R5_LOCKED, &qdev->flags)
2074 && test_bit(R5_UPTODATE, &qdev->flags))) ) ) { 3001 && test_bit(R5_UPTODATE, &qdev->flags)))))
2075 /* any written block on an uptodate or failed drive can be 3002 handle_completed_write_requests(conf, sh, disks, &return_bi);
2076 * returned. Note that if we 'wrote' to a failed drive,
2077 * it will be UPTODATE, but never LOCKED, so we don't need
2078 * to test 'failed' directly.
2079 */
2080 for (i=disks; i--; )
2081 if (sh->dev[i].written) {
2082 dev = &sh->dev[i];
2083 if (!test_bit(R5_LOCKED, &dev->flags) &&
2084 test_bit(R5_UPTODATE, &dev->flags) ) {
2085 /* We can return any write requests */
2086 int bitmap_end = 0;
2087 struct bio *wbi, *wbi2;
2088 PRINTK("Return write for stripe %llu disc %d\n",
2089 (unsigned long long)sh->sector, i);
2090 spin_lock_irq(&conf->device_lock);
2091 wbi = dev->written;
2092 dev->written = NULL;
2093 while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
2094 wbi2 = r5_next_bio(wbi, dev->sector);
2095 if (--wbi->bi_phys_segments == 0) {
2096 md_write_end(conf->mddev);
2097 wbi->bi_next = return_bi;
2098 return_bi = wbi;
2099 }
2100 wbi = wbi2;
2101 }
2102 if (dev->towrite == NULL)
2103 bitmap_end = 1;
2104 spin_unlock_irq(&conf->device_lock);
2105 if (bitmap_end)
2106 bitmap_endwrite(conf->mddev->bitmap, sh->sector,
2107 STRIPE_SECTORS,
2108 !test_bit(STRIPE_DEGRADED, &sh->state), 0);
2109 }
2110 }
2111 }
2112 3003
2113 /* Now we might consider reading some blocks, either to check/generate 3004 /* Now we might consider reading some blocks, either to check/generate
2114 * parity, or to satisfy requests 3005 * parity, or to satisfy requests
2115 * or to load a block that is being partially written. 3006 * or to load a block that is being partially written.
2116 */ 3007 */
2117 if (to_read || non_overwrite || (to_write && failed) || 3008 if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
2118 (syncing && (uptodate < disks)) || expanding) { 3009 (s.syncing && (s.uptodate < disks)) || s.expanding)
2119 for (i=disks; i--;) { 3010 handle_issuing_new_read_requests6(sh, &s, &r6s, disks);
2120 dev = &sh->dev[i];
2121 if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
2122 (dev->toread ||
2123 (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
2124 syncing ||
2125 expanding ||
2126 (failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) ||
2127 (failed >= 2 && (sh->dev[failed_num[1]].toread || to_write))
2128 )
2129 ) {
2130 /* we would like to get this block, possibly
2131 * by computing it, but we might not be able to
2132 */
2133 if (uptodate == disks-1) {
2134 PRINTK("Computing stripe %llu block %d\n",
2135 (unsigned long long)sh->sector, i);
2136 compute_block_1(sh, i, 0);
2137 uptodate++;
2138 } else if ( uptodate == disks-2 && failed >= 2 ) {
2139 /* Computing 2-failure is *very* expensive; only do it if failed >= 2 */
2140 int other;
2141 for (other=disks; other--;) {
2142 if ( other == i )
2143 continue;
2144 if ( !test_bit(R5_UPTODATE, &sh->dev[other].flags) )
2145 break;
2146 }
2147 BUG_ON(other < 0);
2148 PRINTK("Computing stripe %llu blocks %d,%d\n",
2149 (unsigned long long)sh->sector, i, other);
2150 compute_block_2(sh, i, other);
2151 uptodate += 2;
2152 } else if (test_bit(R5_Insync, &dev->flags)) {
2153 set_bit(R5_LOCKED, &dev->flags);
2154 set_bit(R5_Wantread, &dev->flags);
2155 locked++;
2156 PRINTK("Reading block %d (sync=%d)\n",
2157 i, syncing);
2158 }
2159 }
2160 }
2161 set_bit(STRIPE_HANDLE, &sh->state);
2162 }
2163 3011
2164 /* now to consider writing and what else, if anything should be read */ 3012 /* now to consider writing and what else, if anything should be read */
2165 if (to_write) { 3013 if (s.to_write)
2166 int rcw=0, must_compute=0; 3014 handle_issuing_new_write_requests6(conf, sh, &s, &r6s, disks);
2167 for (i=disks ; i--;) {
2168 dev = &sh->dev[i];
2169 /* Would I have to read this buffer for reconstruct_write */
2170 if (!test_bit(R5_OVERWRITE, &dev->flags)
2171 && i != pd_idx && i != qd_idx
2172 && (!test_bit(R5_LOCKED, &dev->flags)
2173 ) &&
2174 !test_bit(R5_UPTODATE, &dev->flags)) {
2175 if (test_bit(R5_Insync, &dev->flags)) rcw++;
2176 else {
2177 PRINTK("raid6: must_compute: disk %d flags=%#lx\n", i, dev->flags);
2178 must_compute++;
2179 }
2180 }
2181 }
2182 PRINTK("for sector %llu, rcw=%d, must_compute=%d\n",
2183 (unsigned long long)sh->sector, rcw, must_compute);
2184 set_bit(STRIPE_HANDLE, &sh->state);
2185
2186 if (rcw > 0)
2187 /* want reconstruct write, but need to get some data */
2188 for (i=disks; i--;) {
2189 dev = &sh->dev[i];
2190 if (!test_bit(R5_OVERWRITE, &dev->flags)
2191 && !(failed == 0 && (i == pd_idx || i == qd_idx))
2192 && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
2193 test_bit(R5_Insync, &dev->flags)) {
2194 if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
2195 {
2196 PRINTK("Read_old stripe %llu block %d for Reconstruct\n",
2197 (unsigned long long)sh->sector, i);
2198 set_bit(R5_LOCKED, &dev->flags);
2199 set_bit(R5_Wantread, &dev->flags);
2200 locked++;
2201 } else {
2202 PRINTK("Request delayed stripe %llu block %d for Reconstruct\n",
2203 (unsigned long long)sh->sector, i);
2204 set_bit(STRIPE_DELAYED, &sh->state);
2205 set_bit(STRIPE_HANDLE, &sh->state);
2206 }
2207 }
2208 }
2209 /* now if nothing is locked, and if we have enough data, we can start a write request */
2210 if (locked == 0 && rcw == 0 &&
2211 !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
2212 if ( must_compute > 0 ) {
2213 /* We have failed blocks and need to compute them */
2214 switch ( failed ) {
2215 case 0: BUG();
2216 case 1: compute_block_1(sh, failed_num[0], 0); break;
2217 case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break;
2218 default: BUG(); /* This request should have been failed? */
2219 }
2220 }
2221
2222 PRINTK("Computing parity for stripe %llu\n", (unsigned long long)sh->sector);
2223 compute_parity6(sh, RECONSTRUCT_WRITE);
2224 /* now every locked buffer is ready to be written */
2225 for (i=disks; i--;)
2226 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
2227 PRINTK("Writing stripe %llu block %d\n",
2228 (unsigned long long)sh->sector, i);
2229 locked++;
2230 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2231 }
2232 /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
2233 set_bit(STRIPE_INSYNC, &sh->state);
2234
2235 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
2236 atomic_dec(&conf->preread_active_stripes);
2237 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
2238 md_wakeup_thread(conf->mddev->thread);
2239 }
2240 }
2241 }
2242 3015
2243 /* maybe we need to check and possibly fix the parity for this stripe 3016 /* maybe we need to check and possibly fix the parity for this stripe
2244 * Any reads will already have been scheduled, so we just see if enough data 3017 * Any reads will already have been scheduled, so we just see if enough
2245 * is available 3018 * data is available
2246 */ 3019 */
2247 if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) { 3020 if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
2248 int update_p = 0, update_q = 0; 3021 handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks);
2249 struct r5dev *dev;
2250
2251 set_bit(STRIPE_HANDLE, &sh->state);
2252
2253 BUG_ON(failed>2);
2254 BUG_ON(uptodate < disks);
2255 /* Want to check and possibly repair P and Q.
2256 * However there could be one 'failed' device, in which
2257 * case we can only check one of them, possibly using the
2258 * other to generate missing data
2259 */
2260
2261 /* If !tmp_page, we cannot do the calculations,
2262 * but as we have set STRIPE_HANDLE, we will soon be called
2263 * by stripe_handle with a tmp_page - just wait until then.
2264 */
2265 if (tmp_page) {
2266 if (failed == q_failed) {
2267 /* The only possible failed device holds 'Q', so it makes
2268 * sense to check P (If anything else were failed, we would
2269 * have used P to recreate it).
2270 */
2271 compute_block_1(sh, pd_idx, 1);
2272 if (!page_is_zero(sh->dev[pd_idx].page)) {
2273 compute_block_1(sh,pd_idx,0);
2274 update_p = 1;
2275 }
2276 }
2277 if (!q_failed && failed < 2) {
2278 /* q is not failed, and we didn't use it to generate
2279 * anything, so it makes sense to check it
2280 */
2281 memcpy(page_address(tmp_page),
2282 page_address(sh->dev[qd_idx].page),
2283 STRIPE_SIZE);
2284 compute_parity6(sh, UPDATE_PARITY);
2285 if (memcmp(page_address(tmp_page),
2286 page_address(sh->dev[qd_idx].page),
2287 STRIPE_SIZE)!= 0) {
2288 clear_bit(STRIPE_INSYNC, &sh->state);
2289 update_q = 1;
2290 }
2291 }
2292 if (update_p || update_q) {
2293 conf->mddev->resync_mismatches += STRIPE_SECTORS;
2294 if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
2295 /* don't try to repair!! */
2296 update_p = update_q = 0;
2297 }
2298
2299 /* now write out any block on a failed drive,
2300 * or P or Q if they need it
2301 */
2302 3022
2303 if (failed == 2) { 3023 if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
2304 dev = &sh->dev[failed_num[1]];
2305 locked++;
2306 set_bit(R5_LOCKED, &dev->flags);
2307 set_bit(R5_Wantwrite, &dev->flags);
2308 }
2309 if (failed >= 1) {
2310 dev = &sh->dev[failed_num[0]];
2311 locked++;
2312 set_bit(R5_LOCKED, &dev->flags);
2313 set_bit(R5_Wantwrite, &dev->flags);
2314 }
2315
2316 if (update_p) {
2317 dev = &sh->dev[pd_idx];
2318 locked ++;
2319 set_bit(R5_LOCKED, &dev->flags);
2320 set_bit(R5_Wantwrite, &dev->flags);
2321 }
2322 if (update_q) {
2323 dev = &sh->dev[qd_idx];
2324 locked++;
2325 set_bit(R5_LOCKED, &dev->flags);
2326 set_bit(R5_Wantwrite, &dev->flags);
2327 }
2328 clear_bit(STRIPE_DEGRADED, &sh->state);
2329
2330 set_bit(STRIPE_INSYNC, &sh->state);
2331 }
2332 }
2333
2334 if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
2335 md_done_sync(conf->mddev, STRIPE_SECTORS,1); 3024 md_done_sync(conf->mddev, STRIPE_SECTORS,1);
2336 clear_bit(STRIPE_SYNCING, &sh->state); 3025 clear_bit(STRIPE_SYNCING, &sh->state);
2337 } 3026 }
@@ -2339,9 +3028,9 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2339 /* If the failed drives are just a ReadError, then we might need 3028 /* If the failed drives are just a ReadError, then we might need
2340 * to progress the repair/check process 3029 * to progress the repair/check process
2341 */ 3030 */
2342 if (failed <= 2 && ! conf->mddev->ro) 3031 if (s.failed <= 2 && !conf->mddev->ro)
2343 for (i=0; i<failed;i++) { 3032 for (i = 0; i < s.failed; i++) {
2344 dev = &sh->dev[failed_num[i]]; 3033 dev = &sh->dev[r6s.failed_num[i]];
2345 if (test_bit(R5_ReadError, &dev->flags) 3034 if (test_bit(R5_ReadError, &dev->flags)
2346 && !test_bit(R5_LOCKED, &dev->flags) 3035 && !test_bit(R5_LOCKED, &dev->flags)
2347 && test_bit(R5_UPTODATE, &dev->flags) 3036 && test_bit(R5_UPTODATE, &dev->flags)
@@ -2358,7 +3047,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2358 } 3047 }
2359 } 3048 }
2360 3049
2361 if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3050 if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
2362 /* Need to write out all blocks after computing P&Q */ 3051 /* Need to write out all blocks after computing P&Q */
2363 sh->disks = conf->raid_disks; 3052 sh->disks = conf->raid_disks;
2364 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 3053 sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
@@ -2366,82 +3055,24 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2366 compute_parity6(sh, RECONSTRUCT_WRITE); 3055 compute_parity6(sh, RECONSTRUCT_WRITE);
2367 for (i = conf->raid_disks ; i-- ; ) { 3056 for (i = conf->raid_disks ; i-- ; ) {
2368 set_bit(R5_LOCKED, &sh->dev[i].flags); 3057 set_bit(R5_LOCKED, &sh->dev[i].flags);
2369 locked++; 3058 s.locked++;
2370 set_bit(R5_Wantwrite, &sh->dev[i].flags); 3059 set_bit(R5_Wantwrite, &sh->dev[i].flags);
2371 } 3060 }
2372 clear_bit(STRIPE_EXPANDING, &sh->state); 3061 clear_bit(STRIPE_EXPANDING, &sh->state);
2373 } else if (expanded) { 3062 } else if (s.expanded) {
2374 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3063 clear_bit(STRIPE_EXPAND_READY, &sh->state);
2375 atomic_dec(&conf->reshape_stripes); 3064 atomic_dec(&conf->reshape_stripes);
2376 wake_up(&conf->wait_for_overlap); 3065 wake_up(&conf->wait_for_overlap);
2377 md_done_sync(conf->mddev, STRIPE_SECTORS, 1); 3066 md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
2378 } 3067 }
2379 3068
2380 if (expanding && locked == 0) { 3069 if (s.expanding && s.locked == 0)
2381 /* We have read all the blocks in this stripe and now we need to 3070 handle_stripe_expansion(conf, sh, &r6s);
2382 * copy some of them into a target stripe for expand.
2383 */
2384 clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
2385 for (i = 0; i < sh->disks ; i++)
2386 if (i != pd_idx && i != qd_idx) {
2387 int dd_idx2, pd_idx2, j;
2388 struct stripe_head *sh2;
2389
2390 sector_t bn = compute_blocknr(sh, i);
2391 sector_t s = raid5_compute_sector(
2392 bn, conf->raid_disks,
2393 conf->raid_disks - conf->max_degraded,
2394 &dd_idx2, &pd_idx2, conf);
2395 sh2 = get_active_stripe(conf, s,
2396 conf->raid_disks,
2397 pd_idx2, 1);
2398 if (sh2 == NULL)
2399 /* so for only the early blocks of
2400 * this stripe have been requests.
2401 * When later blocks get requests, we
2402 * will try again
2403 */
2404 continue;
2405 if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
2406 test_bit(R5_Expanded,
2407 &sh2->dev[dd_idx2].flags)) {
2408 /* must have already done this block */
2409 release_stripe(sh2);
2410 continue;
2411 }
2412 memcpy(page_address(sh2->dev[dd_idx2].page),
2413 page_address(sh->dev[i].page),
2414 STRIPE_SIZE);
2415 set_bit(R5_Expanded, &sh2->dev[dd_idx2].flags);
2416 set_bit(R5_UPTODATE, &sh2->dev[dd_idx2].flags);
2417 for (j = 0 ; j < conf->raid_disks ; j++)
2418 if (j != sh2->pd_idx &&
2419 j != raid6_next_disk(sh2->pd_idx,
2420 sh2->disks) &&
2421 !test_bit(R5_Expanded,
2422 &sh2->dev[j].flags))
2423 break;
2424 if (j == conf->raid_disks) {
2425 set_bit(STRIPE_EXPAND_READY,
2426 &sh2->state);
2427 set_bit(STRIPE_HANDLE, &sh2->state);
2428 }
2429 release_stripe(sh2);
2430 }
2431 }
2432 3071
2433 spin_unlock(&sh->lock); 3072 spin_unlock(&sh->lock);
2434 3073
2435 while ((bi=return_bi)) { 3074 return_io(return_bi);
2436 int bytes = bi->bi_size;
2437 3075
2438 return_bi = bi->bi_next;
2439 bi->bi_next = NULL;
2440 bi->bi_size = 0;
2441 bi->bi_end_io(bi, bytes,
2442 test_bit(BIO_UPTODATE, &bi->bi_flags)
2443 ? 0 : -EIO);
2444 }
2445 for (i=disks; i-- ;) { 3076 for (i=disks; i-- ;) {
2446 int rw; 3077 int rw;
2447 struct bio *bi; 3078 struct bio *bi;
@@ -2470,11 +3101,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2470 rcu_read_unlock(); 3101 rcu_read_unlock();
2471 3102
2472 if (rdev) { 3103 if (rdev) {
2473 if (syncing || expanding || expanded) 3104 if (s.syncing || s.expanding || s.expanded)
2474 md_sync_acct(rdev->bdev, STRIPE_SECTORS); 3105 md_sync_acct(rdev->bdev, STRIPE_SECTORS);
2475 3106
2476 bi->bi_bdev = rdev->bdev; 3107 bi->bi_bdev = rdev->bdev;
2477 PRINTK("for %llu schedule op %ld on disc %d\n", 3108 pr_debug("for %llu schedule op %ld on disc %d\n",
2478 (unsigned long long)sh->sector, bi->bi_rw, i); 3109 (unsigned long long)sh->sector, bi->bi_rw, i);
2479 atomic_inc(&sh->count); 3110 atomic_inc(&sh->count);
2480 bi->bi_sector = sh->sector + rdev->data_offset; 3111 bi->bi_sector = sh->sector + rdev->data_offset;
@@ -2494,7 +3125,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2494 } else { 3125 } else {
2495 if (rw == WRITE) 3126 if (rw == WRITE)
2496 set_bit(STRIPE_DEGRADED, &sh->state); 3127 set_bit(STRIPE_DEGRADED, &sh->state);
2497 PRINTK("skip op %ld on disc %d for sector %llu\n", 3128 pr_debug("skip op %ld on disc %d for sector %llu\n",
2498 bi->bi_rw, i, (unsigned long long)sh->sector); 3129 bi->bi_rw, i, (unsigned long long)sh->sector);
2499 clear_bit(R5_LOCKED, &sh->dev[i].flags); 3130 clear_bit(R5_LOCKED, &sh->dev[i].flags);
2500 set_bit(STRIPE_HANDLE, &sh->state); 3131 set_bit(STRIPE_HANDLE, &sh->state);
@@ -2738,7 +3369,7 @@ static int raid5_align_endio(struct bio *bi, unsigned int bytes, int error)
2738 } 3369 }
2739 3370
2740 3371
2741 PRINTK("raid5_align_endio : io error...handing IO for a retry\n"); 3372 pr_debug("raid5_align_endio : io error...handing IO for a retry\n");
2742 3373
2743 add_bio_to_retry(raid_bi, conf); 3374 add_bio_to_retry(raid_bi, conf);
2744 return 0; 3375 return 0;
@@ -2776,7 +3407,7 @@ static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
2776 mdk_rdev_t *rdev; 3407 mdk_rdev_t *rdev;
2777 3408
2778 if (!in_chunk_boundary(mddev, raid_bio)) { 3409 if (!in_chunk_boundary(mddev, raid_bio)) {
2779 PRINTK("chunk_aligned_read : non aligned\n"); 3410 pr_debug("chunk_aligned_read : non aligned\n");
2780 return 0; 3411 return 0;
2781 } 3412 }
2782 /* 3413 /*
@@ -2900,7 +3531,7 @@ static int make_request(request_queue_t *q, struct bio * bi)
2900 3531
2901 new_sector = raid5_compute_sector(logical_sector, disks, data_disks, 3532 new_sector = raid5_compute_sector(logical_sector, disks, data_disks,
2902 &dd_idx, &pd_idx, conf); 3533 &dd_idx, &pd_idx, conf);
2903 PRINTK("raid5: make_request, sector %llu logical %llu\n", 3534 pr_debug("raid5: make_request, sector %llu logical %llu\n",
2904 (unsigned long long)new_sector, 3535 (unsigned long long)new_sector,
2905 (unsigned long long)logical_sector); 3536 (unsigned long long)logical_sector);
2906 3537
@@ -3273,7 +3904,7 @@ static void raid5d (mddev_t *mddev)
3273 raid5_conf_t *conf = mddev_to_conf(mddev); 3904 raid5_conf_t *conf = mddev_to_conf(mddev);
3274 int handled; 3905 int handled;
3275 3906
3276 PRINTK("+++ raid5d active\n"); 3907 pr_debug("+++ raid5d active\n");
3277 3908
3278 md_check_recovery(mddev); 3909 md_check_recovery(mddev);
3279 3910
@@ -3308,8 +3939,10 @@ static void raid5d (mddev_t *mddev)
3308 handled++; 3939 handled++;
3309 } 3940 }
3310 3941
3311 if (list_empty(&conf->handle_list)) 3942 if (list_empty(&conf->handle_list)) {
3943 async_tx_issue_pending_all();
3312 break; 3944 break;
3945 }
3313 3946
3314 first = conf->handle_list.next; 3947 first = conf->handle_list.next;
3315 sh = list_entry(first, struct stripe_head, lru); 3948 sh = list_entry(first, struct stripe_head, lru);
@@ -3325,13 +3958,13 @@ static void raid5d (mddev_t *mddev)
3325 3958
3326 spin_lock_irq(&conf->device_lock); 3959 spin_lock_irq(&conf->device_lock);
3327 } 3960 }
3328 PRINTK("%d stripes handled\n", handled); 3961 pr_debug("%d stripes handled\n", handled);
3329 3962
3330 spin_unlock_irq(&conf->device_lock); 3963 spin_unlock_irq(&conf->device_lock);
3331 3964
3332 unplug_slaves(mddev); 3965 unplug_slaves(mddev);
3333 3966
3334 PRINTK("--- raid5d inactive\n"); 3967 pr_debug("--- raid5d inactive\n");
3335} 3968}
3336 3969
3337static ssize_t 3970static ssize_t
@@ -3507,7 +4140,7 @@ static int run(mddev_t *mddev)
3507 atomic_set(&conf->preread_active_stripes, 0); 4140 atomic_set(&conf->preread_active_stripes, 0);
3508 atomic_set(&conf->active_aligned_reads, 0); 4141 atomic_set(&conf->active_aligned_reads, 0);
3509 4142
3510 PRINTK("raid5: run(%s) called.\n", mdname(mddev)); 4143 pr_debug("raid5: run(%s) called.\n", mdname(mddev));
3511 4144
3512 ITERATE_RDEV(mddev,rdev,tmp) { 4145 ITERATE_RDEV(mddev,rdev,tmp) {
3513 raid_disk = rdev->raid_disk; 4146 raid_disk = rdev->raid_disk;
@@ -3690,7 +4323,7 @@ static int stop(mddev_t *mddev)
3690 return 0; 4323 return 0;
3691} 4324}
3692 4325
3693#if RAID5_DEBUG 4326#ifdef DEBUG
3694static void print_sh (struct seq_file *seq, struct stripe_head *sh) 4327static void print_sh (struct seq_file *seq, struct stripe_head *sh)
3695{ 4328{
3696 int i; 4329 int i;
@@ -3737,7 +4370,7 @@ static void status (struct seq_file *seq, mddev_t *mddev)
3737 conf->disks[i].rdev && 4370 conf->disks[i].rdev &&
3738 test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_"); 4371 test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_");
3739 seq_printf (seq, "]"); 4372 seq_printf (seq, "]");
3740#if RAID5_DEBUG 4373#ifdef DEBUG
3741 seq_printf (seq, "\n"); 4374 seq_printf (seq, "\n");
3742 printall(seq, conf); 4375 printall(seq, conf);
3743#endif 4376#endif
diff --git a/include/asm-arm/arch-iop13xx/adma.h b/include/asm-arm/arch-iop13xx/adma.h
new file mode 100644
index 000000000000..04006c1c5fd7
--- /dev/null
+++ b/include/asm-arm/arch-iop13xx/adma.h
@@ -0,0 +1,544 @@
1/*
2 * Copyright(c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 */
18#ifndef _ADMA_H
19#define _ADMA_H
20#include <linux/types.h>
21#include <linux/io.h>
22#include <asm/hardware.h>
23#include <asm/hardware/iop_adma.h>
24
25#define ADMA_ACCR(chan) (chan->mmr_base + 0x0)
26#define ADMA_ACSR(chan) (chan->mmr_base + 0x4)
27#define ADMA_ADAR(chan) (chan->mmr_base + 0x8)
28#define ADMA_IIPCR(chan) (chan->mmr_base + 0x18)
29#define ADMA_IIPAR(chan) (chan->mmr_base + 0x1c)
30#define ADMA_IIPUAR(chan) (chan->mmr_base + 0x20)
31#define ADMA_ANDAR(chan) (chan->mmr_base + 0x24)
32#define ADMA_ADCR(chan) (chan->mmr_base + 0x28)
33#define ADMA_CARMD(chan) (chan->mmr_base + 0x2c)
34#define ADMA_ABCR(chan) (chan->mmr_base + 0x30)
35#define ADMA_DLADR(chan) (chan->mmr_base + 0x34)
36#define ADMA_DUADR(chan) (chan->mmr_base + 0x38)
37#define ADMA_SLAR(src, chan) (chan->mmr_base + (0x3c + (src << 3)))
38#define ADMA_SUAR(src, chan) (chan->mmr_base + (0x40 + (src << 3)))
39
40struct iop13xx_adma_src {
41 u32 src_addr;
42 union {
43 u32 upper_src_addr;
44 struct {
45 unsigned int pq_upper_src_addr:24;
46 unsigned int pq_dmlt:8;
47 };
48 };
49};
50
51struct iop13xx_adma_desc_ctrl {
52 unsigned int int_en:1;
53 unsigned int xfer_dir:2;
54 unsigned int src_select:4;
55 unsigned int zero_result:1;
56 unsigned int block_fill_en:1;
57 unsigned int crc_gen_en:1;
58 unsigned int crc_xfer_dis:1;
59 unsigned int crc_seed_fetch_dis:1;
60 unsigned int status_write_back_en:1;
61 unsigned int endian_swap_en:1;
62 unsigned int reserved0:2;
63 unsigned int pq_update_xfer_en:1;
64 unsigned int dual_xor_en:1;
65 unsigned int pq_xfer_en:1;
66 unsigned int p_xfer_dis:1;
67 unsigned int reserved1:10;
68 unsigned int relax_order_en:1;
69 unsigned int no_snoop_en:1;
70};
71
72struct iop13xx_adma_byte_count {
73 unsigned int byte_count:24;
74 unsigned int host_if:3;
75 unsigned int reserved:2;
76 unsigned int zero_result_err_q:1;
77 unsigned int zero_result_err:1;
78 unsigned int tx_complete:1;
79};
80
81struct iop13xx_adma_desc_hw {
82 u32 next_desc;
83 union {
84 u32 desc_ctrl;
85 struct iop13xx_adma_desc_ctrl desc_ctrl_field;
86 };
87 union {
88 u32 crc_addr;
89 u32 block_fill_data;
90 u32 q_dest_addr;
91 };
92 union {
93 u32 byte_count;
94 struct iop13xx_adma_byte_count byte_count_field;
95 };
96 union {
97 u32 dest_addr;
98 u32 p_dest_addr;
99 };
100 union {
101 u32 upper_dest_addr;
102 u32 pq_upper_dest_addr;
103 };
104 struct iop13xx_adma_src src[1];
105};
106
107struct iop13xx_adma_desc_dual_xor {
108 u32 next_desc;
109 u32 desc_ctrl;
110 u32 reserved;
111 u32 byte_count;
112 u32 h_dest_addr;
113 u32 h_upper_dest_addr;
114 u32 src0_addr;
115 u32 upper_src0_addr;
116 u32 src1_addr;
117 u32 upper_src1_addr;
118 u32 h_src_addr;
119 u32 h_upper_src_addr;
120 u32 d_src_addr;
121 u32 d_upper_src_addr;
122 u32 d_dest_addr;
123 u32 d_upper_dest_addr;
124};
125
126struct iop13xx_adma_desc_pq_update {
127 u32 next_desc;
128 u32 desc_ctrl;
129 u32 reserved;
130 u32 byte_count;
131 u32 p_dest_addr;
132 u32 p_upper_dest_addr;
133 u32 src0_addr;
134 u32 upper_src0_addr;
135 u32 src1_addr;
136 u32 upper_src1_addr;
137 u32 p_src_addr;
138 u32 p_upper_src_addr;
139 u32 q_src_addr;
140 struct {
141 unsigned int q_upper_src_addr:24;
142 unsigned int q_dmlt:8;
143 };
144 u32 q_dest_addr;
145 u32 q_upper_dest_addr;
146};
147
148static inline int iop_adma_get_max_xor(void)
149{
150 return 16;
151}
152
153static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
154{
155 return __raw_readl(ADMA_ADAR(chan));
156}
157
158static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
159 u32 next_desc_addr)
160{
161 __raw_writel(next_desc_addr, ADMA_ANDAR(chan));
162}
163
164#define ADMA_STATUS_BUSY (1 << 13)
165
166static inline char iop_chan_is_busy(struct iop_adma_chan *chan)
167{
168 if (__raw_readl(ADMA_ACSR(chan)) &
169 ADMA_STATUS_BUSY)
170 return 1;
171 else
172 return 0;
173}
174
175static inline int
176iop_chan_get_desc_align(struct iop_adma_chan *chan, int num_slots)
177{
178 return 1;
179}
180#define iop_desc_is_aligned(x, y) 1
181
182static inline int
183iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
184{
185 *slots_per_op = 1;
186 return 1;
187}
188
189#define iop_chan_interrupt_slot_count(s, c) iop_chan_memcpy_slot_count(0, s)
190
191static inline int
192iop_chan_memset_slot_count(size_t len, int *slots_per_op)
193{
194 *slots_per_op = 1;
195 return 1;
196}
197
198static inline int
199iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
200{
201 int num_slots;
202 /* slots_to_find = 1 for basic descriptor + 1 per 4 sources above 1
203 * (1 source => 8 bytes) (1 slot => 32 bytes)
204 */
205 num_slots = 1 + (((src_cnt - 1) << 3) >> 5);
206 if (((src_cnt - 1) << 3) & 0x1f)
207 num_slots++;
208
209 *slots_per_op = num_slots;
210
211 return num_slots;
212}
213
214#define ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
215#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
216#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
217#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
218#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
219
220static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
221 struct iop_adma_chan *chan)
222{
223 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
224 return hw_desc->dest_addr;
225}
226
227static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
228 struct iop_adma_chan *chan)
229{
230 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
231 return hw_desc->byte_count_field.byte_count;
232}
233
234static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
235 struct iop_adma_chan *chan,
236 int src_idx)
237{
238 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
239 return hw_desc->src[src_idx].src_addr;
240}
241
242static inline u32 iop_desc_get_src_count(struct iop_adma_desc_slot *desc,
243 struct iop_adma_chan *chan)
244{
245 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
246 return hw_desc->desc_ctrl_field.src_select + 1;
247}
248
249static inline void
250iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
251{
252 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
253 union {
254 u32 value;
255 struct iop13xx_adma_desc_ctrl field;
256 } u_desc_ctrl;
257
258 u_desc_ctrl.value = 0;
259 u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
260 u_desc_ctrl.field.int_en = int_en;
261 hw_desc->desc_ctrl = u_desc_ctrl.value;
262 hw_desc->crc_addr = 0;
263}
264
265static inline void
266iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
267{
268 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
269 union {
270 u32 value;
271 struct iop13xx_adma_desc_ctrl field;
272 } u_desc_ctrl;
273
274 u_desc_ctrl.value = 0;
275 u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
276 u_desc_ctrl.field.block_fill_en = 1;
277 u_desc_ctrl.field.int_en = int_en;
278 hw_desc->desc_ctrl = u_desc_ctrl.value;
279 hw_desc->crc_addr = 0;
280}
281
282/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
283static inline void
284iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
285{
286 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
287 union {
288 u32 value;
289 struct iop13xx_adma_desc_ctrl field;
290 } u_desc_ctrl;
291
292 u_desc_ctrl.value = 0;
293 u_desc_ctrl.field.src_select = src_cnt - 1;
294 u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
295 u_desc_ctrl.field.int_en = int_en;
296 hw_desc->desc_ctrl = u_desc_ctrl.value;
297 hw_desc->crc_addr = 0;
298
299}
300#define iop_desc_init_null_xor(d, s, i) iop_desc_init_xor(d, s, i)
301
302/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
303static inline int
304iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
305{
306 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
307 union {
308 u32 value;
309 struct iop13xx_adma_desc_ctrl field;
310 } u_desc_ctrl;
311
312 u_desc_ctrl.value = 0;
313 u_desc_ctrl.field.src_select = src_cnt - 1;
314 u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
315 u_desc_ctrl.field.zero_result = 1;
316 u_desc_ctrl.field.status_write_back_en = 1;
317 u_desc_ctrl.field.int_en = int_en;
318 hw_desc->desc_ctrl = u_desc_ctrl.value;
319 hw_desc->crc_addr = 0;
320
321 return 1;
322}
323
324static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
325 struct iop_adma_chan *chan,
326 u32 byte_count)
327{
328 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
329 hw_desc->byte_count = byte_count;
330}
331
332static inline void
333iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
334{
335 int slots_per_op = desc->slots_per_op;
336 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
337 int i = 0;
338
339 if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
340 hw_desc->byte_count = len;
341 } else {
342 do {
343 iter = iop_hw_desc_slot_idx(hw_desc, i);
344 iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
345 len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
346 i += slots_per_op;
347 } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
348
349 if (len) {
350 iter = iop_hw_desc_slot_idx(hw_desc, i);
351 iter->byte_count = len;
352 }
353 }
354}
355
356
357static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
358 struct iop_adma_chan *chan,
359 dma_addr_t addr)
360{
361 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
362 hw_desc->dest_addr = addr;
363 hw_desc->upper_dest_addr = 0;
364}
365
366static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
367 dma_addr_t addr)
368{
369 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
370 hw_desc->src[0].src_addr = addr;
371 hw_desc->src[0].upper_src_addr = 0;
372}
373
374static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
375 int src_idx, dma_addr_t addr)
376{
377 int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
378 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
379 int i = 0;
380
381 do {
382 iter = iop_hw_desc_slot_idx(hw_desc, i);
383 iter->src[src_idx].src_addr = addr;
384 iter->src[src_idx].upper_src_addr = 0;
385 slot_cnt -= slots_per_op;
386 if (slot_cnt) {
387 i += slots_per_op;
388 addr += IOP_ADMA_XOR_MAX_BYTE_COUNT;
389 }
390 } while (slot_cnt);
391}
392
393static inline void
394iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
395 struct iop_adma_chan *chan)
396{
397 iop_desc_init_memcpy(desc, 1);
398 iop_desc_set_byte_count(desc, chan, 0);
399 iop_desc_set_dest_addr(desc, chan, 0);
400 iop_desc_set_memcpy_src_addr(desc, 0);
401}
402
403#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
404
405static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
406 u32 next_desc_addr)
407{
408 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
409 BUG_ON(hw_desc->next_desc);
410 hw_desc->next_desc = next_desc_addr;
411}
412
413static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
414{
415 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
416 return hw_desc->next_desc;
417}
418
419static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
420{
421 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
422 hw_desc->next_desc = 0;
423}
424
425static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
426 u32 val)
427{
428 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
429 hw_desc->block_fill_data = val;
430}
431
432static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
433{
434 struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
435 struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
436 struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
437
438 BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
439
440 if (desc_ctrl.pq_xfer_en)
441 return byte_count.zero_result_err_q;
442 else
443 return byte_count.zero_result_err;
444}
445
446static inline void iop_chan_append(struct iop_adma_chan *chan)
447{
448 u32 adma_accr;
449
450 adma_accr = __raw_readl(ADMA_ACCR(chan));
451 adma_accr |= 0x2;
452 __raw_writel(adma_accr, ADMA_ACCR(chan));
453}
454
455static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan)
456{
457 do { } while (0);
458}
459
460static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
461{
462 return __raw_readl(ADMA_ACSR(chan));
463}
464
465static inline void iop_chan_disable(struct iop_adma_chan *chan)
466{
467 u32 adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan));
468 adma_chan_ctrl &= ~0x1;
469 __raw_writel(adma_chan_ctrl, ADMA_ACCR(chan));
470}
471
472static inline void iop_chan_enable(struct iop_adma_chan *chan)
473{
474 u32 adma_chan_ctrl;
475
476 adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan));
477 adma_chan_ctrl |= 0x1;
478 __raw_writel(adma_chan_ctrl, ADMA_ACCR(chan));
479}
480
481static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
482{
483 u32 status = __raw_readl(ADMA_ACSR(chan));
484 status &= (1 << 12);
485 __raw_writel(status, ADMA_ACSR(chan));
486}
487
488static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
489{
490 u32 status = __raw_readl(ADMA_ACSR(chan));
491 status &= (1 << 11);
492 __raw_writel(status, ADMA_ACSR(chan));
493}
494
495static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
496{
497 u32 status = __raw_readl(ADMA_ACSR(chan));
498 status &= (1 << 9) | (1 << 5) | (1 << 4) | (1 << 3);
499 __raw_writel(status, ADMA_ACSR(chan));
500}
501
502static inline int
503iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
504{
505 return test_bit(9, &status);
506}
507
508static inline int
509iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
510{
511 return test_bit(5, &status);
512}
513
514static inline int
515iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
516{
517 return test_bit(4, &status);
518}
519
520static inline int
521iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
522{
523 return test_bit(3, &status);
524}
525
526static inline int
527iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
528{
529 return 0;
530}
531
532static inline int
533iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
534{
535 return 0;
536}
537
538static inline int
539iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
540{
541 return 0;
542}
543
544#endif /* _ADMA_H */
diff --git a/include/asm-arm/arch-iop13xx/iop13xx.h b/include/asm-arm/arch-iop13xx/iop13xx.h
index e6736c3d1f7f..d4e4f828577c 100644
--- a/include/asm-arm/arch-iop13xx/iop13xx.h
+++ b/include/asm-arm/arch-iop13xx/iop13xx.h
@@ -166,12 +166,22 @@ static inline int iop13xx_cpu_id(void)
166#define IOP13XX_INIT_I2C_1 (1 << 1) 166#define IOP13XX_INIT_I2C_1 (1 << 1)
167#define IOP13XX_INIT_I2C_2 (1 << 2) 167#define IOP13XX_INIT_I2C_2 (1 << 2)
168 168
169#define IQ81340_NUM_UART 2 169/* ADMA selection flags */
170#define IQ81340_NUM_I2C 3 170/* INIT_ADMA_DEFAULT = Rely on CONFIG_IOP13XX_ADMA* */
171#define IQ81340_NUM_PHYS_MAP_FLASH 1 171#define IOP13XX_INIT_ADMA_DEFAULT (0)
172#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART +\ 172#define IOP13XX_INIT_ADMA_0 (1 << 0)
173 IQ81340_NUM_I2C +\ 173#define IOP13XX_INIT_ADMA_1 (1 << 1)
174 IQ81340_NUM_PHYS_MAP_FLASH) 174#define IOP13XX_INIT_ADMA_2 (1 << 2)
175
176/* Platform devices */
177#define IQ81340_NUM_UART 2
178#define IQ81340_NUM_I2C 3
179#define IQ81340_NUM_PHYS_MAP_FLASH 1
180#define IQ81340_NUM_ADMA 3
181#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART + \
182 IQ81340_NUM_I2C + \
183 IQ81340_NUM_PHYS_MAP_FLASH + \
184 IQ81340_NUM_ADMA)
175 185
176/*========================== PMMR offsets for key registers ============*/ 186/*========================== PMMR offsets for key registers ============*/
177#define IOP13XX_ATU0_PMMR_OFFSET 0x00048000 187#define IOP13XX_ATU0_PMMR_OFFSET 0x00048000
@@ -444,22 +454,6 @@ static inline int iop13xx_cpu_id(void)
444/*==============================ADMA UNITS===============================*/ 454/*==============================ADMA UNITS===============================*/
445#define IOP13XX_ADMA_PHYS_BASE(chan) IOP13XX_REG_ADDR32_PHYS((chan << 9)) 455#define IOP13XX_ADMA_PHYS_BASE(chan) IOP13XX_REG_ADDR32_PHYS((chan << 9))
446#define IOP13XX_ADMA_UPPER_PA(chan) (IOP13XX_ADMA_PHYS_BASE(chan) + 0xc0) 456#define IOP13XX_ADMA_UPPER_PA(chan) (IOP13XX_ADMA_PHYS_BASE(chan) + 0xc0)
447#define IOP13XX_ADMA_OFFSET(chan, ofs) IOP13XX_REG_ADDR32((chan << 9) + (ofs))
448
449#define IOP13XX_ADMA_ACCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x0)
450#define IOP13XX_ADMA_ACSR(chan) IOP13XX_ADMA_OFFSET(chan, 0x4)
451#define IOP13XX_ADMA_ADAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x8)
452#define IOP13XX_ADMA_IIPCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x18)
453#define IOP13XX_ADMA_IIPAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x1c)
454#define IOP13XX_ADMA_IIPUAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x20)
455#define IOP13XX_ADMA_ANDAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x24)
456#define IOP13XX_ADMA_ADCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x28)
457#define IOP13XX_ADMA_CARMD(chan) IOP13XX_ADMA_OFFSET(chan, 0x2c)
458#define IOP13XX_ADMA_ABCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x30)
459#define IOP13XX_ADMA_DLADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x34)
460#define IOP13XX_ADMA_DUADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x38)
461#define IOP13XX_ADMA_SLAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x3c + (src <<3))
462#define IOP13XX_ADMA_SUAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x40 + (src <<3))
463 457
464/*==============================XSI BRIDGE===============================*/ 458/*==============================XSI BRIDGE===============================*/
465#define IOP13XX_XBG_BECSR IOP13XX_REG_ADDR32(0x178c) 459#define IOP13XX_XBG_BECSR IOP13XX_REG_ADDR32(0x178c)
diff --git a/include/asm-arm/arch-iop32x/adma.h b/include/asm-arm/arch-iop32x/adma.h
new file mode 100644
index 000000000000..5ed92037dd10
--- /dev/null
+++ b/include/asm-arm/arch-iop32x/adma.h
@@ -0,0 +1,5 @@
1#ifndef IOP32X_ADMA_H
2#define IOP32X_ADMA_H
3#include <asm/hardware/iop3xx-adma.h>
4#endif
5
diff --git a/include/asm-arm/arch-iop33x/adma.h b/include/asm-arm/arch-iop33x/adma.h
new file mode 100644
index 000000000000..4b92f795f90e
--- /dev/null
+++ b/include/asm-arm/arch-iop33x/adma.h
@@ -0,0 +1,5 @@
1#ifndef IOP33X_ADMA_H
2#define IOP33X_ADMA_H
3#include <asm/hardware/iop3xx-adma.h>
4#endif
5
diff --git a/include/asm-arm/hardware/iop3xx-adma.h b/include/asm-arm/hardware/iop3xx-adma.h
new file mode 100644
index 000000000000..10834b54f681
--- /dev/null
+++ b/include/asm-arm/hardware/iop3xx-adma.h
@@ -0,0 +1,892 @@
1/*
2 * Copyright © 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 */
18#ifndef _ADMA_H
19#define _ADMA_H
20#include <linux/types.h>
21#include <linux/io.h>
22#include <asm/hardware.h>
23#include <asm/hardware/iop_adma.h>
24
25/* Memory copy units */
26#define DMA_CCR(chan) (chan->mmr_base + 0x0)
27#define DMA_CSR(chan) (chan->mmr_base + 0x4)
28#define DMA_DAR(chan) (chan->mmr_base + 0xc)
29#define DMA_NDAR(chan) (chan->mmr_base + 0x10)
30#define DMA_PADR(chan) (chan->mmr_base + 0x14)
31#define DMA_PUADR(chan) (chan->mmr_base + 0x18)
32#define DMA_LADR(chan) (chan->mmr_base + 0x1c)
33#define DMA_BCR(chan) (chan->mmr_base + 0x20)
34#define DMA_DCR(chan) (chan->mmr_base + 0x24)
35
36/* Application accelerator unit */
37#define AAU_ACR(chan) (chan->mmr_base + 0x0)
38#define AAU_ASR(chan) (chan->mmr_base + 0x4)
39#define AAU_ADAR(chan) (chan->mmr_base + 0x8)
40#define AAU_ANDAR(chan) (chan->mmr_base + 0xc)
41#define AAU_SAR(src, chan) (chan->mmr_base + (0x10 + ((src) << 2)))
42#define AAU_DAR(chan) (chan->mmr_base + 0x20)
43#define AAU_ABCR(chan) (chan->mmr_base + 0x24)
44#define AAU_ADCR(chan) (chan->mmr_base + 0x28)
45#define AAU_SAR_EDCR(src_edc) (chan->mmr_base + (0x02c + ((src_edc-4) << 2)))
46#define AAU_EDCR0_IDX 8
47#define AAU_EDCR1_IDX 17
48#define AAU_EDCR2_IDX 26
49
50#define DMA0_ID 0
51#define DMA1_ID 1
52#define AAU_ID 2
53
54struct iop3xx_aau_desc_ctrl {
55 unsigned int int_en:1;
56 unsigned int blk1_cmd_ctrl:3;
57 unsigned int blk2_cmd_ctrl:3;
58 unsigned int blk3_cmd_ctrl:3;
59 unsigned int blk4_cmd_ctrl:3;
60 unsigned int blk5_cmd_ctrl:3;
61 unsigned int blk6_cmd_ctrl:3;
62 unsigned int blk7_cmd_ctrl:3;
63 unsigned int blk8_cmd_ctrl:3;
64 unsigned int blk_ctrl:2;
65 unsigned int dual_xor_en:1;
66 unsigned int tx_complete:1;
67 unsigned int zero_result_err:1;
68 unsigned int zero_result_en:1;
69 unsigned int dest_write_en:1;
70};
71
72struct iop3xx_aau_e_desc_ctrl {
73 unsigned int reserved:1;
74 unsigned int blk1_cmd_ctrl:3;
75 unsigned int blk2_cmd_ctrl:3;
76 unsigned int blk3_cmd_ctrl:3;
77 unsigned int blk4_cmd_ctrl:3;
78 unsigned int blk5_cmd_ctrl:3;
79 unsigned int blk6_cmd_ctrl:3;
80 unsigned int blk7_cmd_ctrl:3;
81 unsigned int blk8_cmd_ctrl:3;
82 unsigned int reserved2:7;
83};
84
85struct iop3xx_dma_desc_ctrl {
86 unsigned int pci_transaction:4;
87 unsigned int int_en:1;
88 unsigned int dac_cycle_en:1;
89 unsigned int mem_to_mem_en:1;
90 unsigned int crc_data_tx_en:1;
91 unsigned int crc_gen_en:1;
92 unsigned int crc_seed_dis:1;
93 unsigned int reserved:21;
94 unsigned int crc_tx_complete:1;
95};
96
97struct iop3xx_desc_dma {
98 u32 next_desc;
99 union {
100 u32 pci_src_addr;
101 u32 pci_dest_addr;
102 u32 src_addr;
103 };
104 union {
105 u32 upper_pci_src_addr;
106 u32 upper_pci_dest_addr;
107 };
108 union {
109 u32 local_pci_src_addr;
110 u32 local_pci_dest_addr;
111 u32 dest_addr;
112 };
113 u32 byte_count;
114 union {
115 u32 desc_ctrl;
116 struct iop3xx_dma_desc_ctrl desc_ctrl_field;
117 };
118 u32 crc_addr;
119};
120
121struct iop3xx_desc_aau {
122 u32 next_desc;
123 u32 src[4];
124 u32 dest_addr;
125 u32 byte_count;
126 union {
127 u32 desc_ctrl;
128 struct iop3xx_aau_desc_ctrl desc_ctrl_field;
129 };
130 union {
131 u32 src_addr;
132 u32 e_desc_ctrl;
133 struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
134 } src_edc[31];
135};
136
137struct iop3xx_aau_gfmr {
138 unsigned int gfmr1:8;
139 unsigned int gfmr2:8;
140 unsigned int gfmr3:8;
141 unsigned int gfmr4:8;
142};
143
144struct iop3xx_desc_pq_xor {
145 u32 next_desc;
146 u32 src[3];
147 union {
148 u32 data_mult1;
149 struct iop3xx_aau_gfmr data_mult1_field;
150 };
151 u32 dest_addr;
152 u32 byte_count;
153 union {
154 u32 desc_ctrl;
155 struct iop3xx_aau_desc_ctrl desc_ctrl_field;
156 };
157 union {
158 u32 src_addr;
159 u32 e_desc_ctrl;
160 struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
161 u32 data_multiplier;
162 struct iop3xx_aau_gfmr data_mult_field;
163 u32 reserved;
164 } src_edc_gfmr[19];
165};
166
167struct iop3xx_desc_dual_xor {
168 u32 next_desc;
169 u32 src0_addr;
170 u32 src1_addr;
171 u32 h_src_addr;
172 u32 d_src_addr;
173 u32 h_dest_addr;
174 u32 byte_count;
175 union {
176 u32 desc_ctrl;
177 struct iop3xx_aau_desc_ctrl desc_ctrl_field;
178 };
179 u32 d_dest_addr;
180};
181
182union iop3xx_desc {
183 struct iop3xx_desc_aau *aau;
184 struct iop3xx_desc_dma *dma;
185 struct iop3xx_desc_pq_xor *pq_xor;
186 struct iop3xx_desc_dual_xor *dual_xor;
187 void *ptr;
188};
189
190static inline int iop_adma_get_max_xor(void)
191{
192 return 32;
193}
194
195static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
196{
197 int id = chan->device->id;
198
199 switch (id) {
200 case DMA0_ID:
201 case DMA1_ID:
202 return __raw_readl(DMA_DAR(chan));
203 case AAU_ID:
204 return __raw_readl(AAU_ADAR(chan));
205 default:
206 BUG();
207 }
208 return 0;
209}
210
211static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
212 u32 next_desc_addr)
213{
214 int id = chan->device->id;
215
216 switch (id) {
217 case DMA0_ID:
218 case DMA1_ID:
219 __raw_writel(next_desc_addr, DMA_NDAR(chan));
220 break;
221 case AAU_ID:
222 __raw_writel(next_desc_addr, AAU_ANDAR(chan));
223 break;
224 }
225
226}
227
228#define IOP_ADMA_STATUS_BUSY (1 << 10)
229#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024)
230#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024)
231#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
232
233static inline int iop_chan_is_busy(struct iop_adma_chan *chan)
234{
235 u32 status = __raw_readl(DMA_CSR(chan));
236 return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0;
237}
238
239static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc,
240 int num_slots)
241{
242 /* num_slots will only ever be 1, 2, 4, or 8 */
243 return (desc->idx & (num_slots - 1)) ? 0 : 1;
244}
245
246/* to do: support large (i.e. > hw max) buffer sizes */
247static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
248{
249 *slots_per_op = 1;
250 return 1;
251}
252
253/* to do: support large (i.e. > hw max) buffer sizes */
254static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op)
255{
256 *slots_per_op = 1;
257 return 1;
258}
259
260static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
261 int *slots_per_op)
262{
263 const static int slot_count_table[] = { 0,
264 1, 1, 1, 1, /* 01 - 04 */
265 2, 2, 2, 2, /* 05 - 08 */
266 4, 4, 4, 4, /* 09 - 12 */
267 4, 4, 4, 4, /* 13 - 16 */
268 8, 8, 8, 8, /* 17 - 20 */
269 8, 8, 8, 8, /* 21 - 24 */
270 8, 8, 8, 8, /* 25 - 28 */
271 8, 8, 8, 8, /* 29 - 32 */
272 };
273 *slots_per_op = slot_count_table[src_cnt];
274 return *slots_per_op;
275}
276
277static inline int
278iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan)
279{
280 switch (chan->device->id) {
281 case DMA0_ID:
282 case DMA1_ID:
283 return iop_chan_memcpy_slot_count(0, slots_per_op);
284 case AAU_ID:
285 return iop3xx_aau_xor_slot_count(0, 2, slots_per_op);
286 default:
287 BUG();
288 }
289 return 0;
290}
291
292static inline int iop_chan_xor_slot_count(size_t len, int src_cnt,
293 int *slots_per_op)
294{
295 int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
296
297 if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT)
298 return slot_cnt;
299
300 len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
301 while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) {
302 len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
303 slot_cnt += *slots_per_op;
304 }
305
306 if (len)
307 slot_cnt += *slots_per_op;
308
309 return slot_cnt;
310}
311
312/* zero sum on iop3xx is limited to 1k at a time so it requires multiple
313 * descriptors
314 */
315static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
316 int *slots_per_op)
317{
318 int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
319
320 if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT)
321 return slot_cnt;
322
323 len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
324 while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
325 len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
326 slot_cnt += *slots_per_op;
327 }
328
329 if (len)
330 slot_cnt += *slots_per_op;
331
332 return slot_cnt;
333}
334
335static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
336 struct iop_adma_chan *chan)
337{
338 union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
339
340 switch (chan->device->id) {
341 case DMA0_ID:
342 case DMA1_ID:
343 return hw_desc.dma->dest_addr;
344 case AAU_ID:
345 return hw_desc.aau->dest_addr;
346 default:
347 BUG();
348 }
349 return 0;
350}
351
352static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
353 struct iop_adma_chan *chan)
354{
355 union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
356
357 switch (chan->device->id) {
358 case DMA0_ID:
359 case DMA1_ID:
360 return hw_desc.dma->byte_count;
361 case AAU_ID:
362 return hw_desc.aau->byte_count;
363 default:
364 BUG();
365 }
366 return 0;
367}
368
369/* translate the src_idx to a descriptor word index */
370static inline int __desc_idx(int src_idx)
371{
372 const static int desc_idx_table[] = { 0, 0, 0, 0,
373 0, 1, 2, 3,
374 5, 6, 7, 8,
375 9, 10, 11, 12,
376 14, 15, 16, 17,
377 18, 19, 20, 21,
378 23, 24, 25, 26,
379 27, 28, 29, 30,
380 };
381
382 return desc_idx_table[src_idx];
383}
384
385static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
386 struct iop_adma_chan *chan,
387 int src_idx)
388{
389 union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
390
391 switch (chan->device->id) {
392 case DMA0_ID:
393 case DMA1_ID:
394 return hw_desc.dma->src_addr;
395 case AAU_ID:
396 break;
397 default:
398 BUG();
399 }
400
401 if (src_idx < 4)
402 return hw_desc.aau->src[src_idx];
403 else
404 return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr;
405}
406
407static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc,
408 int src_idx, dma_addr_t addr)
409{
410 if (src_idx < 4)
411 hw_desc->src[src_idx] = addr;
412 else
413 hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr;
414}
415
416static inline void
417iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
418{
419 struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
420 union {
421 u32 value;
422 struct iop3xx_dma_desc_ctrl field;
423 } u_desc_ctrl;
424
425 u_desc_ctrl.value = 0;
426 u_desc_ctrl.field.mem_to_mem_en = 1;
427 u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */
428 u_desc_ctrl.field.int_en = int_en;
429 hw_desc->desc_ctrl = u_desc_ctrl.value;
430 hw_desc->upper_pci_src_addr = 0;
431 hw_desc->crc_addr = 0;
432}
433
434static inline void
435iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
436{
437 struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
438 union {
439 u32 value;
440 struct iop3xx_aau_desc_ctrl field;
441 } u_desc_ctrl;
442
443 u_desc_ctrl.value = 0;
444 u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */
445 u_desc_ctrl.field.dest_write_en = 1;
446 u_desc_ctrl.field.int_en = int_en;
447 hw_desc->desc_ctrl = u_desc_ctrl.value;
448}
449
450static inline u32
451iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, int int_en)
452{
453 int i, shift;
454 u32 edcr;
455 union {
456 u32 value;
457 struct iop3xx_aau_desc_ctrl field;
458 } u_desc_ctrl;
459
460 u_desc_ctrl.value = 0;
461 switch (src_cnt) {
462 case 25 ... 32:
463 u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
464 edcr = 0;
465 shift = 1;
466 for (i = 24; i < src_cnt; i++) {
467 edcr |= (1 << shift);
468 shift += 3;
469 }
470 hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr;
471 src_cnt = 24;
472 /* fall through */
473 case 17 ... 24:
474 if (!u_desc_ctrl.field.blk_ctrl) {
475 hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
476 u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
477 }
478 edcr = 0;
479 shift = 1;
480 for (i = 16; i < src_cnt; i++) {
481 edcr |= (1 << shift);
482 shift += 3;
483 }
484 hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr;
485 src_cnt = 16;
486 /* fall through */
487 case 9 ... 16:
488 if (!u_desc_ctrl.field.blk_ctrl)
489 u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
490 edcr = 0;
491 shift = 1;
492 for (i = 8; i < src_cnt; i++) {
493 edcr |= (1 << shift);
494 shift += 3;
495 }
496 hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr;
497 src_cnt = 8;
498 /* fall through */
499 case 2 ... 8:
500 shift = 1;
501 for (i = 0; i < src_cnt; i++) {
502 u_desc_ctrl.value |= (1 << shift);
503 shift += 3;
504 }
505
506 if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
507 u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
508 }
509
510 u_desc_ctrl.field.dest_write_en = 1;
511 u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */
512 u_desc_ctrl.field.int_en = int_en;
513 hw_desc->desc_ctrl = u_desc_ctrl.value;
514
515 return u_desc_ctrl.value;
516}
517
518static inline void
519iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
520{
521 iop3xx_desc_init_xor(desc->hw_desc, src_cnt, int_en);
522}
523
524/* return the number of operations */
525static inline int
526iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
527{
528 int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
529 struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter;
530 union {
531 u32 value;
532 struct iop3xx_aau_desc_ctrl field;
533 } u_desc_ctrl;
534 int i, j;
535
536 hw_desc = desc->hw_desc;
537
538 for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0;
539 i += slots_per_op, j++) {
540 iter = iop_hw_desc_slot_idx(hw_desc, i);
541 u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, int_en);
542 u_desc_ctrl.field.dest_write_en = 0;
543 u_desc_ctrl.field.zero_result_en = 1;
544 u_desc_ctrl.field.int_en = int_en;
545 iter->desc_ctrl = u_desc_ctrl.value;
546
547 /* for the subsequent descriptors preserve the store queue
548 * and chain them together
549 */
550 if (i) {
551 prev_hw_desc =
552 iop_hw_desc_slot_idx(hw_desc, i - slots_per_op);
553 prev_hw_desc->next_desc =
554 (u32) (desc->async_tx.phys + (i << 5));
555 }
556 }
557
558 return j;
559}
560
561static inline void
562iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
563{
564 struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
565 union {
566 u32 value;
567 struct iop3xx_aau_desc_ctrl field;
568 } u_desc_ctrl;
569
570 u_desc_ctrl.value = 0;
571 switch (src_cnt) {
572 case 25 ... 32:
573 u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
574 hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
575 /* fall through */
576 case 17 ... 24:
577 if (!u_desc_ctrl.field.blk_ctrl) {
578 hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
579 u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
580 }
581 hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0;
582 /* fall through */
583 case 9 ... 16:
584 if (!u_desc_ctrl.field.blk_ctrl)
585 u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
586 hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0;
587 /* fall through */
588 case 1 ... 8:
589 if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
590 u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
591 }
592
593 u_desc_ctrl.field.dest_write_en = 0;
594 u_desc_ctrl.field.int_en = int_en;
595 hw_desc->desc_ctrl = u_desc_ctrl.value;
596}
597
598static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
599 struct iop_adma_chan *chan,
600 u32 byte_count)
601{
602 union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
603
604 switch (chan->device->id) {
605 case DMA0_ID:
606 case DMA1_ID:
607 hw_desc.dma->byte_count = byte_count;
608 break;
609 case AAU_ID:
610 hw_desc.aau->byte_count = byte_count;
611 break;
612 default:
613 BUG();
614 }
615}
616
617static inline void
618iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
619 struct iop_adma_chan *chan)
620{
621 union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
622
623 switch (chan->device->id) {
624 case DMA0_ID:
625 case DMA1_ID:
626 iop_desc_init_memcpy(desc, 1);
627 hw_desc.dma->byte_count = 0;
628 hw_desc.dma->dest_addr = 0;
629 hw_desc.dma->src_addr = 0;
630 break;
631 case AAU_ID:
632 iop_desc_init_null_xor(desc, 2, 1);
633 hw_desc.aau->byte_count = 0;
634 hw_desc.aau->dest_addr = 0;
635 hw_desc.aau->src[0] = 0;
636 hw_desc.aau->src[1] = 0;
637 break;
638 default:
639 BUG();
640 }
641}
642
643static inline void
644iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
645{
646 int slots_per_op = desc->slots_per_op;
647 struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
648 int i = 0;
649
650 if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
651 hw_desc->byte_count = len;
652 } else {
653 do {
654 iter = iop_hw_desc_slot_idx(hw_desc, i);
655 iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
656 len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
657 i += slots_per_op;
658 } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
659
660 if (len) {
661 iter = iop_hw_desc_slot_idx(hw_desc, i);
662 iter->byte_count = len;
663 }
664 }
665}
666
667static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
668 struct iop_adma_chan *chan,
669 dma_addr_t addr)
670{
671 union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
672
673 switch (chan->device->id) {
674 case DMA0_ID:
675 case DMA1_ID:
676 hw_desc.dma->dest_addr = addr;
677 break;
678 case AAU_ID:
679 hw_desc.aau->dest_addr = addr;
680 break;
681 default:
682 BUG();
683 }
684}
685
686static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
687 dma_addr_t addr)
688{
689 struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
690 hw_desc->src_addr = addr;
691}
692
693static inline void
694iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
695 dma_addr_t addr)
696{
697
698 struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
699 int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
700 int i;
701
702 for (i = 0; (slot_cnt -= slots_per_op) >= 0;
703 i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
704 iter = iop_hw_desc_slot_idx(hw_desc, i);
705 iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
706 }
707}
708
709static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
710 int src_idx, dma_addr_t addr)
711{
712
713 struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
714 int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
715 int i;
716
717 for (i = 0; (slot_cnt -= slots_per_op) >= 0;
718 i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) {
719 iter = iop_hw_desc_slot_idx(hw_desc, i);
720 iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
721 }
722}
723
724static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
725 u32 next_desc_addr)
726{
727 /* hw_desc->next_desc is the same location for all channels */
728 union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
729 BUG_ON(hw_desc.dma->next_desc);
730 hw_desc.dma->next_desc = next_desc_addr;
731}
732
733static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
734{
735 /* hw_desc->next_desc is the same location for all channels */
736 union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
737 return hw_desc.dma->next_desc;
738}
739
740static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
741{
742 /* hw_desc->next_desc is the same location for all channels */
743 union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
744 hw_desc.dma->next_desc = 0;
745}
746
747static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
748 u32 val)
749{
750 struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
751 hw_desc->src[0] = val;
752}
753
754static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
755{
756 struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
757 struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
758
759 BUG_ON(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
760 return desc_ctrl.zero_result_err;
761}
762
763static inline void iop_chan_append(struct iop_adma_chan *chan)
764{
765 u32 dma_chan_ctrl;
766 /* workaround dropped interrupts on 3xx */
767 mod_timer(&chan->cleanup_watchdog, jiffies + msecs_to_jiffies(3));
768
769 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
770 dma_chan_ctrl |= 0x2;
771 __raw_writel(dma_chan_ctrl, DMA_CCR(chan));
772}
773
774static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan)
775{
776 if (!busy)
777 del_timer(&chan->cleanup_watchdog);
778}
779
780static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
781{
782 return __raw_readl(DMA_CSR(chan));
783}
784
785static inline void iop_chan_disable(struct iop_adma_chan *chan)
786{
787 u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
788 dma_chan_ctrl &= ~1;
789 __raw_writel(dma_chan_ctrl, DMA_CCR(chan));
790}
791
792static inline void iop_chan_enable(struct iop_adma_chan *chan)
793{
794 u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
795
796 dma_chan_ctrl |= 1;
797 __raw_writel(dma_chan_ctrl, DMA_CCR(chan));
798}
799
800static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
801{
802 u32 status = __raw_readl(DMA_CSR(chan));
803 status &= (1 << 9);
804 __raw_writel(status, DMA_CSR(chan));
805}
806
807static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
808{
809 u32 status = __raw_readl(DMA_CSR(chan));
810 status &= (1 << 8);
811 __raw_writel(status, DMA_CSR(chan));
812}
813
814static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
815{
816 u32 status = __raw_readl(DMA_CSR(chan));
817
818 switch (chan->device->id) {
819 case DMA0_ID:
820 case DMA1_ID:
821 status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1);
822 break;
823 case AAU_ID:
824 status &= (1 << 5);
825 break;
826 default:
827 BUG();
828 }
829
830 __raw_writel(status, DMA_CSR(chan));
831}
832
833static inline int
834iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
835{
836 return 0;
837}
838
839static inline int
840iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
841{
842 return 0;
843}
844
845static inline int
846iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
847{
848 return 0;
849}
850
851static inline int
852iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
853{
854 return test_bit(5, &status);
855}
856
857static inline int
858iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
859{
860 switch (chan->device->id) {
861 case DMA0_ID:
862 case DMA1_ID:
863 return test_bit(2, &status);
864 default:
865 return 0;
866 }
867}
868
869static inline int
870iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
871{
872 switch (chan->device->id) {
873 case DMA0_ID:
874 case DMA1_ID:
875 return test_bit(3, &status);
876 default:
877 return 0;
878 }
879}
880
881static inline int
882iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
883{
884 switch (chan->device->id) {
885 case DMA0_ID:
886 case DMA1_ID:
887 return test_bit(1, &status);
888 default:
889 return 0;
890 }
891}
892#endif /* _ADMA_H */
diff --git a/include/asm-arm/hardware/iop3xx.h b/include/asm-arm/hardware/iop3xx.h
index 63feceb7ede5..81ca5d3e2bff 100644
--- a/include/asm-arm/hardware/iop3xx.h
+++ b/include/asm-arm/hardware/iop3xx.h
@@ -144,24 +144,9 @@ extern int init_atu;
144#define IOP3XX_IAR (volatile u32 *)IOP3XX_REG_ADDR(0x0380) 144#define IOP3XX_IAR (volatile u32 *)IOP3XX_REG_ADDR(0x0380)
145 145
146/* DMA Controller */ 146/* DMA Controller */
147#define IOP3XX_DMA0_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0400) 147#define IOP3XX_DMA_PHYS_BASE(chan) (IOP3XX_PERIPHERAL_PHYS_BASE + \
148#define IOP3XX_DMA0_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0404) 148 (0x400 + (chan << 6)))
149#define IOP3XX_DMA0_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x040c) 149#define IOP3XX_DMA_UPPER_PA(chan) (IOP3XX_DMA_PHYS_BASE(chan) + 0x27)
150#define IOP3XX_DMA0_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0410)
151#define IOP3XX_DMA0_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0414)
152#define IOP3XX_DMA0_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0418)
153#define IOP3XX_DMA0_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x041c)
154#define IOP3XX_DMA0_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0420)
155#define IOP3XX_DMA0_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0424)
156#define IOP3XX_DMA1_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0440)
157#define IOP3XX_DMA1_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0444)
158#define IOP3XX_DMA1_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x044c)
159#define IOP3XX_DMA1_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0450)
160#define IOP3XX_DMA1_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0454)
161#define IOP3XX_DMA1_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0458)
162#define IOP3XX_DMA1_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x045c)
163#define IOP3XX_DMA1_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0460)
164#define IOP3XX_DMA1_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0464)
165 150
166/* Peripheral bus interface */ 151/* Peripheral bus interface */
167#define IOP3XX_PBCR (volatile u32 *)IOP3XX_REG_ADDR(0x0680) 152#define IOP3XX_PBCR (volatile u32 *)IOP3XX_REG_ADDR(0x0680)
@@ -210,48 +195,8 @@ extern int init_atu;
210#define IOP_TMR_RATIO_1_1 0x00 195#define IOP_TMR_RATIO_1_1 0x00
211 196
212/* Application accelerator unit */ 197/* Application accelerator unit */
213#define IOP3XX_AAU_ACR (volatile u32 *)IOP3XX_REG_ADDR(0x0800) 198#define IOP3XX_AAU_PHYS_BASE (IOP3XX_PERIPHERAL_PHYS_BASE + 0x800)
214#define IOP3XX_AAU_ASR (volatile u32 *)IOP3XX_REG_ADDR(0x0804) 199#define IOP3XX_AAU_UPPER_PA (IOP3XX_AAU_PHYS_BASE + 0xa7)
215#define IOP3XX_AAU_ADAR (volatile u32 *)IOP3XX_REG_ADDR(0x0808)
216#define IOP3XX_AAU_ANDAR (volatile u32 *)IOP3XX_REG_ADDR(0x080c)
217#define IOP3XX_AAU_SAR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0810)
218#define IOP3XX_AAU_SAR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0814)
219#define IOP3XX_AAU_SAR3 (volatile u32 *)IOP3XX_REG_ADDR(0x0818)
220#define IOP3XX_AAU_SAR4 (volatile u32 *)IOP3XX_REG_ADDR(0x081c)
221#define IOP3XX_AAU_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x0820)
222#define IOP3XX_AAU_ABCR (volatile u32 *)IOP3XX_REG_ADDR(0x0824)
223#define IOP3XX_AAU_ADCR (volatile u32 *)IOP3XX_REG_ADDR(0x0828)
224#define IOP3XX_AAU_SAR5 (volatile u32 *)IOP3XX_REG_ADDR(0x082c)
225#define IOP3XX_AAU_SAR6 (volatile u32 *)IOP3XX_REG_ADDR(0x0830)
226#define IOP3XX_AAU_SAR7 (volatile u32 *)IOP3XX_REG_ADDR(0x0834)
227#define IOP3XX_AAU_SAR8 (volatile u32 *)IOP3XX_REG_ADDR(0x0838)
228#define IOP3XX_AAU_EDCR0 (volatile u32 *)IOP3XX_REG_ADDR(0x083c)
229#define IOP3XX_AAU_SAR9 (volatile u32 *)IOP3XX_REG_ADDR(0x0840)
230#define IOP3XX_AAU_SAR10 (volatile u32 *)IOP3XX_REG_ADDR(0x0844)
231#define IOP3XX_AAU_SAR11 (volatile u32 *)IOP3XX_REG_ADDR(0x0848)
232#define IOP3XX_AAU_SAR12 (volatile u32 *)IOP3XX_REG_ADDR(0x084c)
233#define IOP3XX_AAU_SAR13 (volatile u32 *)IOP3XX_REG_ADDR(0x0850)
234#define IOP3XX_AAU_SAR14 (volatile u32 *)IOP3XX_REG_ADDR(0x0854)
235#define IOP3XX_AAU_SAR15 (volatile u32 *)IOP3XX_REG_ADDR(0x0858)
236#define IOP3XX_AAU_SAR16 (volatile u32 *)IOP3XX_REG_ADDR(0x085c)
237#define IOP3XX_AAU_EDCR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0860)
238#define IOP3XX_AAU_SAR17 (volatile u32 *)IOP3XX_REG_ADDR(0x0864)
239#define IOP3XX_AAU_SAR18 (volatile u32 *)IOP3XX_REG_ADDR(0x0868)
240#define IOP3XX_AAU_SAR19 (volatile u32 *)IOP3XX_REG_ADDR(0x086c)
241#define IOP3XX_AAU_SAR20 (volatile u32 *)IOP3XX_REG_ADDR(0x0870)
242#define IOP3XX_AAU_SAR21 (volatile u32 *)IOP3XX_REG_ADDR(0x0874)
243#define IOP3XX_AAU_SAR22 (volatile u32 *)IOP3XX_REG_ADDR(0x0878)
244#define IOP3XX_AAU_SAR23 (volatile u32 *)IOP3XX_REG_ADDR(0x087c)
245#define IOP3XX_AAU_SAR24 (volatile u32 *)IOP3XX_REG_ADDR(0x0880)
246#define IOP3XX_AAU_EDCR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0884)
247#define IOP3XX_AAU_SAR25 (volatile u32 *)IOP3XX_REG_ADDR(0x0888)
248#define IOP3XX_AAU_SAR26 (volatile u32 *)IOP3XX_REG_ADDR(0x088c)
249#define IOP3XX_AAU_SAR27 (volatile u32 *)IOP3XX_REG_ADDR(0x0890)
250#define IOP3XX_AAU_SAR28 (volatile u32 *)IOP3XX_REG_ADDR(0x0894)
251#define IOP3XX_AAU_SAR29 (volatile u32 *)IOP3XX_REG_ADDR(0x0898)
252#define IOP3XX_AAU_SAR30 (volatile u32 *)IOP3XX_REG_ADDR(0x089c)
253#define IOP3XX_AAU_SAR31 (volatile u32 *)IOP3XX_REG_ADDR(0x08a0)
254#define IOP3XX_AAU_SAR32 (volatile u32 *)IOP3XX_REG_ADDR(0x08a4)
255 200
256/* I2C bus interface unit */ 201/* I2C bus interface unit */
257#define IOP3XX_ICR0 (volatile u32 *)IOP3XX_REG_ADDR(0x1680) 202#define IOP3XX_ICR0 (volatile u32 *)IOP3XX_REG_ADDR(0x1680)
@@ -329,6 +274,9 @@ static inline void write_tisr(u32 val)
329 asm volatile("mcr p6, 0, %0, c6, c1, 0" : : "r" (val)); 274 asm volatile("mcr p6, 0, %0, c6, c1, 0" : : "r" (val));
330} 275}
331 276
277extern struct platform_device iop3xx_dma_0_channel;
278extern struct platform_device iop3xx_dma_1_channel;
279extern struct platform_device iop3xx_aau_channel;
332extern struct platform_device iop3xx_i2c0_device; 280extern struct platform_device iop3xx_i2c0_device;
333extern struct platform_device iop3xx_i2c1_device; 281extern struct platform_device iop3xx_i2c1_device;
334 282
diff --git a/include/asm-arm/hardware/iop_adma.h b/include/asm-arm/hardware/iop_adma.h
new file mode 100644
index 000000000000..ca8e71f44346
--- /dev/null
+++ b/include/asm-arm/hardware/iop_adma.h
@@ -0,0 +1,118 @@
1/*
2 * Copyright © 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 */
18#ifndef IOP_ADMA_H
19#define IOP_ADMA_H
20#include <linux/types.h>
21#include <linux/dmaengine.h>
22#include <linux/interrupt.h>
23
24#define IOP_ADMA_SLOT_SIZE 32
25#define IOP_ADMA_THRESHOLD 4
26
27/**
28 * struct iop_adma_device - internal representation of an ADMA device
29 * @pdev: Platform device
30 * @id: HW ADMA Device selector
31 * @dma_desc_pool: base of DMA descriptor region (DMA address)
32 * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
33 * @common: embedded struct dma_device
34 */
35struct iop_adma_device {
36 struct platform_device *pdev;
37 int id;
38 dma_addr_t dma_desc_pool;
39 void *dma_desc_pool_virt;
40 struct dma_device common;
41};
42
43/**
44 * struct iop_adma_chan - internal representation of an ADMA device
45 * @pending: allows batching of hardware operations
46 * @completed_cookie: identifier for the most recently completed operation
47 * @lock: serializes enqueue/dequeue operations to the slot pool
48 * @mmr_base: memory mapped register base
49 * @chain: device chain view of the descriptors
50 * @device: parent device
51 * @common: common dmaengine channel object members
52 * @last_used: place holder for allocation to continue from where it left off
53 * @all_slots: complete domain of slots usable by the channel
54 * @cleanup_watchdog: workaround missed interrupts on iop3xx
55 * @slots_allocated: records the actual size of the descriptor slot pool
56 * @irq_tasklet: bottom half where iop_adma_slot_cleanup runs
57 */
58struct iop_adma_chan {
59 int pending;
60 dma_cookie_t completed_cookie;
61 spinlock_t lock; /* protects the descriptor slot pool */
62 void __iomem *mmr_base;
63 struct list_head chain;
64 struct iop_adma_device *device;
65 struct dma_chan common;
66 struct iop_adma_desc_slot *last_used;
67 struct list_head all_slots;
68 struct timer_list cleanup_watchdog;
69 int slots_allocated;
70 struct tasklet_struct irq_tasklet;
71};
72
73/**
74 * struct iop_adma_desc_slot - IOP-ADMA software descriptor
75 * @slot_node: node on the iop_adma_chan.all_slots list
76 * @chain_node: node on the op_adma_chan.chain list
77 * @hw_desc: virtual address of the hardware descriptor chain
78 * @phys: hardware address of the hardware descriptor chain
79 * @group_head: first operation in a transaction
80 * @slot_cnt: total slots used in an transaction (group of operations)
81 * @slots_per_op: number of slots per operation
82 * @idx: pool index
83 * @unmap_src_cnt: number of xor sources
84 * @unmap_len: transaction bytecount
85 * @async_tx: support for the async_tx api
86 * @group_list: list of slots that make up a multi-descriptor transaction
87 * for example transfer lengths larger than the supported hw max
88 * @xor_check_result: result of zero sum
89 * @crc32_result: result crc calculation
90 */
91struct iop_adma_desc_slot {
92 struct list_head slot_node;
93 struct list_head chain_node;
94 void *hw_desc;
95 struct iop_adma_desc_slot *group_head;
96 u16 slot_cnt;
97 u16 slots_per_op;
98 u16 idx;
99 u16 unmap_src_cnt;
100 size_t unmap_len;
101 struct dma_async_tx_descriptor async_tx;
102 union {
103 u32 *xor_check_result;
104 u32 *crc32_result;
105 };
106};
107
108struct iop_adma_platform_data {
109 int hw_id;
110 dma_cap_mask_t cap_mask;
111 size_t pool_size;
112};
113
114#define to_iop_sw_desc(addr_hw_desc) \
115 container_of(addr_hw_desc, struct iop_adma_desc_slot, hw_desc)
116#define iop_hw_desc_slot_idx(hw_desc, idx) \
117 ( (void *) (((unsigned long) hw_desc) + ((idx) << 5)) )
118#endif
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
new file mode 100644
index 000000000000..ff1255079fa1
--- /dev/null
+++ b/include/linux/async_tx.h
@@ -0,0 +1,156 @@
1/*
2 * Copyright © 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 */
18#ifndef _ASYNC_TX_H_
19#define _ASYNC_TX_H_
20#include <linux/dmaengine.h>
21#include <linux/spinlock.h>
22#include <linux/interrupt.h>
23
24/**
25 * dma_chan_ref - object used to manage dma channels received from the
26 * dmaengine core.
27 * @chan - the channel being tracked
28 * @node - node for the channel to be placed on async_tx_master_list
29 * @rcu - for list_del_rcu
30 * @count - number of times this channel is listed in the pool
31 * (for channels with multiple capabiities)
32 */
33struct dma_chan_ref {
34 struct dma_chan *chan;
35 struct list_head node;
36 struct rcu_head rcu;
37 atomic_t count;
38};
39
40/**
41 * async_tx_flags - modifiers for the async_* calls
42 * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the
43 * the destination address is not a source. The asynchronous case handles this
44 * implicitly, the synchronous case needs to zero the destination block.
45 * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is
46 * also one of the source addresses. In the synchronous case the destination
47 * address is an implied source, whereas the asynchronous case it must be listed
48 * as a source. The destination address must be the first address in the source
49 * array.
50 * @ASYNC_TX_ASSUME_COHERENT: skip cache maintenance operations
51 * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
52 * dependency chain
53 * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining.
54 * @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously
55 * take an atomic mapping (KM_USER0) on the source page(s)
56 * @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously
57 * take an atomic mapping (KM_USER0) on the dest page(s)
58 */
59enum async_tx_flags {
60 ASYNC_TX_XOR_ZERO_DST = (1 << 0),
61 ASYNC_TX_XOR_DROP_DST = (1 << 1),
62 ASYNC_TX_ASSUME_COHERENT = (1 << 2),
63 ASYNC_TX_ACK = (1 << 3),
64 ASYNC_TX_DEP_ACK = (1 << 4),
65 ASYNC_TX_KMAP_SRC = (1 << 5),
66 ASYNC_TX_KMAP_DST = (1 << 6),
67};
68
69#ifdef CONFIG_DMA_ENGINE
70void async_tx_issue_pending_all(void);
71enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
72void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx);
73struct dma_chan *
74async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
75 enum dma_transaction_type tx_type);
76#else
77static inline void async_tx_issue_pending_all(void)
78{
79 do { } while (0);
80}
81
82static inline enum dma_status
83dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
84{
85 return DMA_SUCCESS;
86}
87
88static inline void
89async_tx_run_dependencies(struct dma_async_tx_descriptor *tx,
90 struct dma_chan *host_chan)
91{
92 do { } while (0);
93}
94
95static inline struct dma_chan *
96async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
97 enum dma_transaction_type tx_type)
98{
99 return NULL;
100}
101#endif
102
103/**
104 * async_tx_sync_epilog - actions to take if an operation is run synchronously
105 * @flags: async_tx flags
106 * @depend_tx: transaction depends on depend_tx
107 * @cb_fn: function to call when the transaction completes
108 * @cb_fn_param: parameter to pass to the callback routine
109 */
110static inline void
111async_tx_sync_epilog(unsigned long flags,
112 struct dma_async_tx_descriptor *depend_tx,
113 dma_async_tx_callback cb_fn, void *cb_fn_param)
114{
115 if (cb_fn)
116 cb_fn(cb_fn_param);
117
118 if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
119 async_tx_ack(depend_tx);
120}
121
122void
123async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
124 enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
125 dma_async_tx_callback cb_fn, void *cb_fn_param);
126
127struct dma_async_tx_descriptor *
128async_xor(struct page *dest, struct page **src_list, unsigned int offset,
129 int src_cnt, size_t len, enum async_tx_flags flags,
130 struct dma_async_tx_descriptor *depend_tx,
131 dma_async_tx_callback cb_fn, void *cb_fn_param);
132
133struct dma_async_tx_descriptor *
134async_xor_zero_sum(struct page *dest, struct page **src_list,
135 unsigned int offset, int src_cnt, size_t len,
136 u32 *result, enum async_tx_flags flags,
137 struct dma_async_tx_descriptor *depend_tx,
138 dma_async_tx_callback cb_fn, void *cb_fn_param);
139
140struct dma_async_tx_descriptor *
141async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
142 unsigned int src_offset, size_t len, enum async_tx_flags flags,
143 struct dma_async_tx_descriptor *depend_tx,
144 dma_async_tx_callback cb_fn, void *cb_fn_param);
145
146struct dma_async_tx_descriptor *
147async_memset(struct page *dest, int val, unsigned int offset,
148 size_t len, enum async_tx_flags flags,
149 struct dma_async_tx_descriptor *depend_tx,
150 dma_async_tx_callback cb_fn, void *cb_fn_param);
151
152struct dma_async_tx_descriptor *
153async_trigger_callback(enum async_tx_flags flags,
154 struct dma_async_tx_descriptor *depend_tx,
155 dma_async_tx_callback cb_fn, void *cb_fn_param);
156#endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c94d8f1d62e5..a3b6035b6c86 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -21,29 +21,40 @@
21#ifndef DMAENGINE_H 21#ifndef DMAENGINE_H
22#define DMAENGINE_H 22#define DMAENGINE_H
23 23
24#ifdef CONFIG_DMA_ENGINE
25
26#include <linux/device.h> 24#include <linux/device.h>
27#include <linux/uio.h> 25#include <linux/uio.h>
28#include <linux/kref.h> 26#include <linux/kref.h>
29#include <linux/completion.h> 27#include <linux/completion.h>
30#include <linux/rcupdate.h> 28#include <linux/rcupdate.h>
29#include <linux/dma-mapping.h>
31 30
32/** 31/**
33 * enum dma_event - resource PNP/power managment events 32 * enum dma_state - resource PNP/power managment state
34 * @DMA_RESOURCE_SUSPEND: DMA device going into low power state 33 * @DMA_RESOURCE_SUSPEND: DMA device going into low power state
35 * @DMA_RESOURCE_RESUME: DMA device returning to full power 34 * @DMA_RESOURCE_RESUME: DMA device returning to full power
36 * @DMA_RESOURCE_ADDED: DMA device added to the system 35 * @DMA_RESOURCE_AVAILABLE: DMA device available to the system
37 * @DMA_RESOURCE_REMOVED: DMA device removed from the system 36 * @DMA_RESOURCE_REMOVED: DMA device removed from the system
38 */ 37 */
39enum dma_event { 38enum dma_state {
40 DMA_RESOURCE_SUSPEND, 39 DMA_RESOURCE_SUSPEND,
41 DMA_RESOURCE_RESUME, 40 DMA_RESOURCE_RESUME,
42 DMA_RESOURCE_ADDED, 41 DMA_RESOURCE_AVAILABLE,
43 DMA_RESOURCE_REMOVED, 42 DMA_RESOURCE_REMOVED,
44}; 43};
45 44
46/** 45/**
46 * enum dma_state_client - state of the channel in the client
47 * @DMA_ACK: client would like to use, or was using this channel
48 * @DMA_DUP: client has already seen this channel, or is not using this channel
49 * @DMA_NAK: client does not want to see any more channels
50 */
51enum dma_state_client {
52 DMA_ACK,
53 DMA_DUP,
54 DMA_NAK,
55};
56
57/**
47 * typedef dma_cookie_t - an opaque DMA cookie 58 * typedef dma_cookie_t - an opaque DMA cookie
48 * 59 *
49 * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code 60 * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code
@@ -65,6 +76,31 @@ enum dma_status {
65}; 76};
66 77
67/** 78/**
79 * enum dma_transaction_type - DMA transaction types/indexes
80 */
81enum dma_transaction_type {
82 DMA_MEMCPY,
83 DMA_XOR,
84 DMA_PQ_XOR,
85 DMA_DUAL_XOR,
86 DMA_PQ_UPDATE,
87 DMA_ZERO_SUM,
88 DMA_PQ_ZERO_SUM,
89 DMA_MEMSET,
90 DMA_MEMCPY_CRC32C,
91 DMA_INTERRUPT,
92};
93
94/* last transaction type for creation of the capabilities mask */
95#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1)
96
97/**
98 * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
99 * See linux/cpumask.h
100 */
101typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
102
103/**
68 * struct dma_chan_percpu - the per-CPU part of struct dma_chan 104 * struct dma_chan_percpu - the per-CPU part of struct dma_chan
69 * @refcount: local_t used for open-coded "bigref" counting 105 * @refcount: local_t used for open-coded "bigref" counting
70 * @memcpy_count: transaction counter 106 * @memcpy_count: transaction counter
@@ -80,7 +116,6 @@ struct dma_chan_percpu {
80 116
81/** 117/**
82 * struct dma_chan - devices supply DMA channels, clients use them 118 * struct dma_chan - devices supply DMA channels, clients use them
83 * @client: ptr to the client user of this chan, will be %NULL when unused
84 * @device: ptr to the dma device who supplies this channel, always !%NULL 119 * @device: ptr to the dma device who supplies this channel, always !%NULL
85 * @cookie: last cookie value returned to client 120 * @cookie: last cookie value returned to client
86 * @chan_id: channel ID for sysfs 121 * @chan_id: channel ID for sysfs
@@ -88,12 +123,10 @@ struct dma_chan_percpu {
88 * @refcount: kref, used in "bigref" slow-mode 123 * @refcount: kref, used in "bigref" slow-mode
89 * @slow_ref: indicates that the DMA channel is free 124 * @slow_ref: indicates that the DMA channel is free
90 * @rcu: the DMA channel's RCU head 125 * @rcu: the DMA channel's RCU head
91 * @client_node: used to add this to the client chan list
92 * @device_node: used to add this to the device chan list 126 * @device_node: used to add this to the device chan list
93 * @local: per-cpu pointer to a struct dma_chan_percpu 127 * @local: per-cpu pointer to a struct dma_chan_percpu
94 */ 128 */
95struct dma_chan { 129struct dma_chan {
96 struct dma_client *client;
97 struct dma_device *device; 130 struct dma_device *device;
98 dma_cookie_t cookie; 131 dma_cookie_t cookie;
99 132
@@ -105,11 +138,11 @@ struct dma_chan {
105 int slow_ref; 138 int slow_ref;
106 struct rcu_head rcu; 139 struct rcu_head rcu;
107 140
108 struct list_head client_node;
109 struct list_head device_node; 141 struct list_head device_node;
110 struct dma_chan_percpu *local; 142 struct dma_chan_percpu *local;
111}; 143};
112 144
145
113void dma_chan_cleanup(struct kref *kref); 146void dma_chan_cleanup(struct kref *kref);
114 147
115static inline void dma_chan_get(struct dma_chan *chan) 148static inline void dma_chan_get(struct dma_chan *chan)
@@ -134,169 +167,206 @@ static inline void dma_chan_put(struct dma_chan *chan)
134 167
135/* 168/*
136 * typedef dma_event_callback - function pointer to a DMA event callback 169 * typedef dma_event_callback - function pointer to a DMA event callback
170 * For each channel added to the system this routine is called for each client.
171 * If the client would like to use the channel it returns '1' to signal (ack)
172 * the dmaengine core to take out a reference on the channel and its
173 * corresponding device. A client must not 'ack' an available channel more
174 * than once. When a channel is removed all clients are notified. If a client
175 * is using the channel it must 'ack' the removal. A client must not 'ack' a
176 * removed channel more than once.
177 * @client - 'this' pointer for the client context
178 * @chan - channel to be acted upon
179 * @state - available or removed
137 */ 180 */
138typedef void (*dma_event_callback) (struct dma_client *client, 181struct dma_client;
139 struct dma_chan *chan, enum dma_event event); 182typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
183 struct dma_chan *chan, enum dma_state state);
140 184
141/** 185/**
142 * struct dma_client - info on the entity making use of DMA services 186 * struct dma_client - info on the entity making use of DMA services
143 * @event_callback: func ptr to call when something happens 187 * @event_callback: func ptr to call when something happens
144 * @chan_count: number of chans allocated 188 * @cap_mask: only return channels that satisfy the requested capabilities
145 * @chans_desired: number of chans requested. Can be +/- chan_count 189 * a value of zero corresponds to any capability
146 * @lock: protects access to the channels list
147 * @channels: the list of DMA channels allocated
148 * @global_node: list_head for global dma_client_list 190 * @global_node: list_head for global dma_client_list
149 */ 191 */
150struct dma_client { 192struct dma_client {
151 dma_event_callback event_callback; 193 dma_event_callback event_callback;
152 unsigned int chan_count; 194 dma_cap_mask_t cap_mask;
153 unsigned int chans_desired;
154
155 spinlock_t lock;
156 struct list_head channels;
157 struct list_head global_node; 195 struct list_head global_node;
158}; 196};
159 197
198typedef void (*dma_async_tx_callback)(void *dma_async_param);
199/**
200 * struct dma_async_tx_descriptor - async transaction descriptor
201 * ---dma generic offload fields---
202 * @cookie: tracking cookie for this transaction, set to -EBUSY if
203 * this tx is sitting on a dependency list
204 * @ack: the descriptor can not be reused until the client acknowledges
205 * receipt, i.e. has has a chance to establish any dependency chains
206 * @phys: physical address of the descriptor
207 * @tx_list: driver common field for operations that require multiple
208 * descriptors
209 * @chan: target channel for this operation
210 * @tx_submit: set the prepared descriptor(s) to be executed by the engine
211 * @tx_set_dest: set a destination address in a hardware descriptor
212 * @tx_set_src: set a source address in a hardware descriptor
213 * @callback: routine to call after this operation is complete
214 * @callback_param: general parameter to pass to the callback routine
215 * ---async_tx api specific fields---
216 * @depend_list: at completion this list of transactions are submitted
217 * @depend_node: allow this transaction to be executed after another
218 * transaction has completed, possibly on another channel
219 * @parent: pointer to the next level up in the dependency chain
220 * @lock: protect the dependency list
221 */
222struct dma_async_tx_descriptor {
223 dma_cookie_t cookie;
224 int ack;
225 dma_addr_t phys;
226 struct list_head tx_list;
227 struct dma_chan *chan;
228 dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
229 void (*tx_set_dest)(dma_addr_t addr,
230 struct dma_async_tx_descriptor *tx, int index);
231 void (*tx_set_src)(dma_addr_t addr,
232 struct dma_async_tx_descriptor *tx, int index);
233 dma_async_tx_callback callback;
234 void *callback_param;
235 struct list_head depend_list;
236 struct list_head depend_node;
237 struct dma_async_tx_descriptor *parent;
238 spinlock_t lock;
239};
240
160/** 241/**
161 * struct dma_device - info on the entity supplying DMA services 242 * struct dma_device - info on the entity supplying DMA services
162 * @chancnt: how many DMA channels are supported 243 * @chancnt: how many DMA channels are supported
163 * @channels: the list of struct dma_chan 244 * @channels: the list of struct dma_chan
164 * @global_node: list_head for global dma_device_list 245 * @global_node: list_head for global dma_device_list
246 * @cap_mask: one or more dma_capability flags
247 * @max_xor: maximum number of xor sources, 0 if no capability
165 * @refcount: reference count 248 * @refcount: reference count
166 * @done: IO completion struct 249 * @done: IO completion struct
167 * @dev_id: unique device ID 250 * @dev_id: unique device ID
251 * @dev: struct device reference for dma mapping api
168 * @device_alloc_chan_resources: allocate resources and return the 252 * @device_alloc_chan_resources: allocate resources and return the
169 * number of allocated descriptors 253 * number of allocated descriptors
170 * @device_free_chan_resources: release DMA channel's resources 254 * @device_free_chan_resources: release DMA channel's resources
171 * @device_memcpy_buf_to_buf: memcpy buf pointer to buf pointer 255 * @device_prep_dma_memcpy: prepares a memcpy operation
172 * @device_memcpy_buf_to_pg: memcpy buf pointer to struct page 256 * @device_prep_dma_xor: prepares a xor operation
173 * @device_memcpy_pg_to_pg: memcpy struct page/offset to struct page/offset 257 * @device_prep_dma_zero_sum: prepares a zero_sum operation
174 * @device_memcpy_complete: poll the status of an IOAT DMA transaction 258 * @device_prep_dma_memset: prepares a memset operation
175 * @device_memcpy_issue_pending: push appended descriptors to hardware 259 * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
260 * @device_dependency_added: async_tx notifies the channel about new deps
261 * @device_issue_pending: push pending transactions to hardware
176 */ 262 */
177struct dma_device { 263struct dma_device {
178 264
179 unsigned int chancnt; 265 unsigned int chancnt;
180 struct list_head channels; 266 struct list_head channels;
181 struct list_head global_node; 267 struct list_head global_node;
268 dma_cap_mask_t cap_mask;
269 int max_xor;
182 270
183 struct kref refcount; 271 struct kref refcount;
184 struct completion done; 272 struct completion done;
185 273
186 int dev_id; 274 int dev_id;
275 struct device *dev;
187 276
188 int (*device_alloc_chan_resources)(struct dma_chan *chan); 277 int (*device_alloc_chan_resources)(struct dma_chan *chan);
189 void (*device_free_chan_resources)(struct dma_chan *chan); 278 void (*device_free_chan_resources)(struct dma_chan *chan);
190 dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan, 279
191 void *dest, void *src, size_t len); 280 struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
192 dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan, 281 struct dma_chan *chan, size_t len, int int_en);
193 struct page *page, unsigned int offset, void *kdata, 282 struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
194 size_t len); 283 struct dma_chan *chan, unsigned int src_cnt, size_t len,
195 dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan, 284 int int_en);
196 struct page *dest_pg, unsigned int dest_off, 285 struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
197 struct page *src_pg, unsigned int src_off, size_t len); 286 struct dma_chan *chan, unsigned int src_cnt, size_t len,
198 enum dma_status (*device_memcpy_complete)(struct dma_chan *chan, 287 u32 *result, int int_en);
288 struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
289 struct dma_chan *chan, int value, size_t len, int int_en);
290 struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
291 struct dma_chan *chan);
292
293 void (*device_dependency_added)(struct dma_chan *chan);
294 enum dma_status (*device_is_tx_complete)(struct dma_chan *chan,
199 dma_cookie_t cookie, dma_cookie_t *last, 295 dma_cookie_t cookie, dma_cookie_t *last,
200 dma_cookie_t *used); 296 dma_cookie_t *used);
201 void (*device_memcpy_issue_pending)(struct dma_chan *chan); 297 void (*device_issue_pending)(struct dma_chan *chan);
202}; 298};
203 299
204/* --- public DMA engine API --- */ 300/* --- public DMA engine API --- */
205 301
206struct dma_client *dma_async_client_register(dma_event_callback event_callback); 302void dma_async_client_register(struct dma_client *client);
207void dma_async_client_unregister(struct dma_client *client); 303void dma_async_client_unregister(struct dma_client *client);
208void dma_async_client_chan_request(struct dma_client *client, 304void dma_async_client_chan_request(struct dma_client *client);
209 unsigned int number); 305dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
306 void *dest, void *src, size_t len);
307dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
308 struct page *page, unsigned int offset, void *kdata, size_t len);
309dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
310 struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
311 unsigned int src_off, size_t len);
312void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
313 struct dma_chan *chan);
210 314
211/** 315static inline void
212 * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses 316async_tx_ack(struct dma_async_tx_descriptor *tx)
213 * @chan: DMA channel to offload copy to
214 * @dest: destination address (virtual)
215 * @src: source address (virtual)
216 * @len: length
217 *
218 * Both @dest and @src must be mappable to a bus address according to the
219 * DMA mapping API rules for streaming mappings.
220 * Both @dest and @src must stay memory resident (kernel memory or locked
221 * user space pages).
222 */
223static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
224 void *dest, void *src, size_t len)
225{ 317{
226 int cpu = get_cpu(); 318 tx->ack = 1;
227 per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
228 per_cpu_ptr(chan->local, cpu)->memcpy_count++;
229 put_cpu();
230
231 return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len);
232} 319}
233 320
234/** 321#define first_dma_cap(mask) __first_dma_cap(&(mask))
235 * dma_async_memcpy_buf_to_pg - offloaded copy from address to page 322static inline int __first_dma_cap(const dma_cap_mask_t *srcp)
236 * @chan: DMA channel to offload copy to
237 * @page: destination page
238 * @offset: offset in page to copy to
239 * @kdata: source address (virtual)
240 * @len: length
241 *
242 * Both @page/@offset and @kdata must be mappable to a bus address according
243 * to the DMA mapping API rules for streaming mappings.
244 * Both @page/@offset and @kdata must stay memory resident (kernel memory or
245 * locked user space pages)
246 */
247static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
248 struct page *page, unsigned int offset, void *kdata, size_t len)
249{ 323{
250 int cpu = get_cpu(); 324 return min_t(int, DMA_TX_TYPE_END,
251 per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; 325 find_first_bit(srcp->bits, DMA_TX_TYPE_END));
252 per_cpu_ptr(chan->local, cpu)->memcpy_count++; 326}
253 put_cpu();
254 327
255 return chan->device->device_memcpy_buf_to_pg(chan, page, offset, 328#define next_dma_cap(n, mask) __next_dma_cap((n), &(mask))
256 kdata, len); 329static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp)
330{
331 return min_t(int, DMA_TX_TYPE_END,
332 find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1));
257} 333}
258 334
259/** 335#define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask))
260 * dma_async_memcpy_pg_to_pg - offloaded copy from page to page 336static inline void
261 * @chan: DMA channel to offload copy to 337__dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp)
262 * @dest_pg: destination page
263 * @dest_off: offset in page to copy to
264 * @src_pg: source page
265 * @src_off: offset in page to copy from
266 * @len: length
267 *
268 * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
269 * address according to the DMA mapping API rules for streaming mappings.
270 * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
271 * (kernel memory or locked user space pages).
272 */
273static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
274 struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
275 unsigned int src_off, size_t len)
276{ 338{
277 int cpu = get_cpu(); 339 set_bit(tx_type, dstp->bits);
278 per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; 340}
279 per_cpu_ptr(chan->local, cpu)->memcpy_count++;
280 put_cpu();
281 341
282 return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off, 342#define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask))
283 src_pg, src_off, len); 343static inline int
344__dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp)
345{
346 return test_bit(tx_type, srcp->bits);
284} 347}
285 348
349#define for_each_dma_cap_mask(cap, mask) \
350 for ((cap) = first_dma_cap(mask); \
351 (cap) < DMA_TX_TYPE_END; \
352 (cap) = next_dma_cap((cap), (mask)))
353
286/** 354/**
287 * dma_async_memcpy_issue_pending - flush pending copies to HW 355 * dma_async_issue_pending - flush pending transactions to HW
288 * @chan: target DMA channel 356 * @chan: target DMA channel
289 * 357 *
290 * This allows drivers to push copies to HW in batches, 358 * This allows drivers to push copies to HW in batches,
291 * reducing MMIO writes where possible. 359 * reducing MMIO writes where possible.
292 */ 360 */
293static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan) 361static inline void dma_async_issue_pending(struct dma_chan *chan)
294{ 362{
295 return chan->device->device_memcpy_issue_pending(chan); 363 return chan->device->device_issue_pending(chan);
296} 364}
297 365
366#define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan)
367
298/** 368/**
299 * dma_async_memcpy_complete - poll for transaction completion 369 * dma_async_is_tx_complete - poll for transaction completion
300 * @chan: DMA channel 370 * @chan: DMA channel
301 * @cookie: transaction identifier to check status of 371 * @cookie: transaction identifier to check status of
302 * @last: returns last completed cookie, can be NULL 372 * @last: returns last completed cookie, can be NULL
@@ -306,12 +376,15 @@ static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan)
306 * internal state and can be used with dma_async_is_complete() to check 376 * internal state and can be used with dma_async_is_complete() to check
307 * the status of multiple cookies without re-checking hardware state. 377 * the status of multiple cookies without re-checking hardware state.
308 */ 378 */
309static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan, 379static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
310 dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) 380 dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
311{ 381{
312 return chan->device->device_memcpy_complete(chan, cookie, last, used); 382 return chan->device->device_is_tx_complete(chan, cookie, last, used);
313} 383}
314 384
385#define dma_async_memcpy_complete(chan, cookie, last, used)\
386 dma_async_is_tx_complete(chan, cookie, last, used)
387
315/** 388/**
316 * dma_async_is_complete - test a cookie against chan state 389 * dma_async_is_complete - test a cookie against chan state
317 * @cookie: transaction identifier to test status of 390 * @cookie: transaction identifier to test status of
@@ -334,6 +407,7 @@ static inline enum dma_status dma_async_is_complete(dma_cookie_t cookie,
334 return DMA_IN_PROGRESS; 407 return DMA_IN_PROGRESS;
335} 408}
336 409
410enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
337 411
338/* --- DMA device --- */ 412/* --- DMA device --- */
339 413
@@ -362,5 +436,4 @@ dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
362 struct dma_pinned_list *pinned_list, struct page *page, 436 struct dma_pinned_list *pinned_list, struct page *page,
363 unsigned int offset, size_t len); 437 unsigned int offset, size_t len);
364 438
365#endif /* CONFIG_DMA_ENGINE */
366#endif /* DMAENGINE_H */ 439#endif /* DMAENGINE_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 9366182fffa7..2c7add169539 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -479,6 +479,9 @@
479#define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM_PCIE 0x0361 479#define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM_PCIE 0x0361
480#define PCI_DEVICE_ID_IBM_ICOM_FOUR_PORT_MODEL 0x252 480#define PCI_DEVICE_ID_IBM_ICOM_FOUR_PORT_MODEL 0x252
481 481
482#define PCI_VENDOR_ID_UNISYS 0x1018
483#define PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR 0x001C
484
482#define PCI_VENDOR_ID_COMPEX2 0x101a /* pci.ids says "AT&T GIS (NCR)" */ 485#define PCI_VENDOR_ID_COMPEX2 0x101a /* pci.ids says "AT&T GIS (NCR)" */
483#define PCI_DEVICE_ID_COMPEX2_100VG 0x0005 486#define PCI_DEVICE_ID_COMPEX2_100VG 0x0005
484 487
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index d8286db60b96..93678f57ccbe 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -116,13 +116,46 @@
116 * attach a request to an active stripe (add_stripe_bh()) 116 * attach a request to an active stripe (add_stripe_bh())
117 * lockdev attach-buffer unlockdev 117 * lockdev attach-buffer unlockdev
118 * handle a stripe (handle_stripe()) 118 * handle a stripe (handle_stripe())
119 * lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io 119 * lockstripe clrSTRIPE_HANDLE ...
120 * (lockdev check-buffers unlockdev) ..
121 * change-state ..
122 * record io/ops needed unlockstripe schedule io/ops
120 * release an active stripe (release_stripe()) 123 * release an active stripe (release_stripe())
121 * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev 124 * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
122 * 125 *
123 * The refcount counts each thread that have activated the stripe, 126 * The refcount counts each thread that have activated the stripe,
124 * plus raid5d if it is handling it, plus one for each active request 127 * plus raid5d if it is handling it, plus one for each active request
125 * on a cached buffer. 128 * on a cached buffer, and plus one if the stripe is undergoing stripe
129 * operations.
130 *
131 * Stripe operations are performed outside the stripe lock,
132 * the stripe operations are:
133 * -copying data between the stripe cache and user application buffers
134 * -computing blocks to save a disk access, or to recover a missing block
135 * -updating the parity on a write operation (reconstruct write and
136 * read-modify-write)
137 * -checking parity correctness
138 * -running i/o to disk
139 * These operations are carried out by raid5_run_ops which uses the async_tx
140 * api to (optionally) offload operations to dedicated hardware engines.
141 * When requesting an operation handle_stripe sets the pending bit for the
142 * operation and increments the count. raid5_run_ops is then run whenever
143 * the count is non-zero.
144 * There are some critical dependencies between the operations that prevent some
145 * from being requested while another is in flight.
146 * 1/ Parity check operations destroy the in cache version of the parity block,
147 * so we prevent parity dependent operations like writes and compute_blocks
148 * from starting while a check is in progress. Some dma engines can perform
149 * the check without damaging the parity block, in these cases the parity
150 * block is re-marked up to date (assuming the check was successful) and is
151 * not re-read from disk.
152 * 2/ When a write operation is requested we immediately lock the affected
153 * blocks, and mark them as not up to date. This causes new read requests
154 * to be held off, as well as parity checks and compute block operations.
155 * 3/ Once a compute block operation has been requested handle_stripe treats
156 * that block as if it is up to date. raid5_run_ops guaruntees that any
157 * operation that is dependent on the compute block result is initiated after
158 * the compute block completes.
126 */ 159 */
127 160
128struct stripe_head { 161struct stripe_head {
@@ -136,15 +169,46 @@ struct stripe_head {
136 spinlock_t lock; 169 spinlock_t lock;
137 int bm_seq; /* sequence number for bitmap flushes */ 170 int bm_seq; /* sequence number for bitmap flushes */
138 int disks; /* disks in stripe */ 171 int disks; /* disks in stripe */
172 /* stripe_operations
173 * @pending - pending ops flags (set for request->issue->complete)
174 * @ack - submitted ops flags (set for issue->complete)
175 * @complete - completed ops flags (set for complete)
176 * @target - STRIPE_OP_COMPUTE_BLK target
177 * @count - raid5_runs_ops is set to run when this is non-zero
178 */
179 struct stripe_operations {
180 unsigned long pending;
181 unsigned long ack;
182 unsigned long complete;
183 int target;
184 int count;
185 u32 zero_sum_result;
186 } ops;
139 struct r5dev { 187 struct r5dev {
140 struct bio req; 188 struct bio req;
141 struct bio_vec vec; 189 struct bio_vec vec;
142 struct page *page; 190 struct page *page;
143 struct bio *toread, *towrite, *written; 191 struct bio *toread, *read, *towrite, *written;
144 sector_t sector; /* sector of this page */ 192 sector_t sector; /* sector of this page */
145 unsigned long flags; 193 unsigned long flags;
146 } dev[1]; /* allocated with extra space depending of RAID geometry */ 194 } dev[1]; /* allocated with extra space depending of RAID geometry */
147}; 195};
196
197/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
198 * for handle_stripe. It is only valid under spin_lock(sh->lock);
199 */
200struct stripe_head_state {
201 int syncing, expanding, expanded;
202 int locked, uptodate, to_read, to_write, failed, written;
203 int to_fill, compute, req_compute, non_overwrite;
204 int failed_num;
205};
206
207/* r6_state - extra state data only relevant to r6 */
208struct r6_state {
209 int p_failed, q_failed, qd_idx, failed_num[2];
210};
211
148/* Flags */ 212/* Flags */
149#define R5_UPTODATE 0 /* page contains current data */ 213#define R5_UPTODATE 0 /* page contains current data */
150#define R5_LOCKED 1 /* IO has been submitted on "req" */ 214#define R5_LOCKED 1 /* IO has been submitted on "req" */
@@ -158,6 +222,15 @@ struct stripe_head {
158#define R5_ReWrite 9 /* have tried to over-write the readerror */ 222#define R5_ReWrite 9 /* have tried to over-write the readerror */
159 223
160#define R5_Expanded 10 /* This block now has post-expand data */ 224#define R5_Expanded 10 /* This block now has post-expand data */
225#define R5_Wantcompute 11 /* compute_block in progress treat as
226 * uptodate
227 */
228#define R5_Wantfill 12 /* dev->toread contains a bio that needs
229 * filling
230 */
231#define R5_Wantprexor 13 /* distinguish blocks ready for rmw from
232 * other "towrites"
233 */
161/* 234/*
162 * Write method 235 * Write method
163 */ 236 */
@@ -180,6 +253,24 @@ struct stripe_head {
180#define STRIPE_EXPAND_SOURCE 10 253#define STRIPE_EXPAND_SOURCE 10
181#define STRIPE_EXPAND_READY 11 254#define STRIPE_EXPAND_READY 11
182/* 255/*
256 * Operations flags (in issue order)
257 */
258#define STRIPE_OP_BIOFILL 0
259#define STRIPE_OP_COMPUTE_BLK 1
260#define STRIPE_OP_PREXOR 2
261#define STRIPE_OP_BIODRAIN 3
262#define STRIPE_OP_POSTXOR 4
263#define STRIPE_OP_CHECK 5
264#define STRIPE_OP_IO 6
265
266/* modifiers to the base operations
267 * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back
268 * STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check
269 */
270#define STRIPE_OP_MOD_REPAIR_PD 7
271#define STRIPE_OP_MOD_DMA_CHECK 8
272
273/*
183 * Plugging: 274 * Plugging:
184 * 275 *
185 * To improve write throughput, we need to delay the handling of some 276 * To improve write throughput, we need to delay the handling of some
diff --git a/include/linux/raid/xor.h b/include/linux/raid/xor.h
index f0d67cbdea40..3e120587eada 100644
--- a/include/linux/raid/xor.h
+++ b/include/linux/raid/xor.h
@@ -3,9 +3,10 @@
3 3
4#include <linux/raid/md.h> 4#include <linux/raid/md.h>
5 5
6#define MAX_XOR_BLOCKS 5 6#define MAX_XOR_BLOCKS 4
7 7
8extern void xor_block(unsigned int count, unsigned int bytes, void **ptr); 8extern void xor_blocks(unsigned int count, unsigned int bytes,
9 void *dest, void **srcs);
9 10
10struct xor_block_template { 11struct xor_block_template {
11 struct xor_block_template *next; 12 struct xor_block_template *next;
diff --git a/net/core/dev.c b/net/core/dev.c
index 4221dcda88d7..96443055324e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -151,9 +151,22 @@ static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */
151static struct list_head ptype_all __read_mostly; /* Taps */ 151static struct list_head ptype_all __read_mostly; /* Taps */
152 152
153#ifdef CONFIG_NET_DMA 153#ifdef CONFIG_NET_DMA
154static struct dma_client *net_dma_client; 154struct net_dma {
155static unsigned int net_dma_count; 155 struct dma_client client;
156static spinlock_t net_dma_event_lock; 156 spinlock_t lock;
157 cpumask_t channel_mask;
158 struct dma_chan *channels[NR_CPUS];
159};
160
161static enum dma_state_client
162netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
163 enum dma_state state);
164
165static struct net_dma net_dma = {
166 .client = {
167 .event_callback = netdev_dma_event,
168 },
169};
157#endif 170#endif
158 171
159/* 172/*
@@ -2022,12 +2035,13 @@ out:
2022 * There may not be any more sk_buffs coming right now, so push 2035 * There may not be any more sk_buffs coming right now, so push
2023 * any pending DMA copies to hardware 2036 * any pending DMA copies to hardware
2024 */ 2037 */
2025 if (net_dma_client) { 2038 if (!cpus_empty(net_dma.channel_mask)) {
2026 struct dma_chan *chan; 2039 int chan_idx;
2027 rcu_read_lock(); 2040 for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2028 list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) 2041 struct dma_chan *chan = net_dma.channels[chan_idx];
2029 dma_async_memcpy_issue_pending(chan); 2042 if (chan)
2030 rcu_read_unlock(); 2043 dma_async_memcpy_issue_pending(chan);
2044 }
2031 } 2045 }
2032#endif 2046#endif
2033 return; 2047 return;
@@ -3775,12 +3789,13 @@ static int dev_cpu_callback(struct notifier_block *nfb,
3775 * This is called when the number of channels allocated to the net_dma_client 3789 * This is called when the number of channels allocated to the net_dma_client
3776 * changes. The net_dma_client tries to have one DMA channel per CPU. 3790 * changes. The net_dma_client tries to have one DMA channel per CPU.
3777 */ 3791 */
3778static void net_dma_rebalance(void) 3792
3793static void net_dma_rebalance(struct net_dma *net_dma)
3779{ 3794{
3780 unsigned int cpu, i, n; 3795 unsigned int cpu, i, n, chan_idx;
3781 struct dma_chan *chan; 3796 struct dma_chan *chan;
3782 3797
3783 if (net_dma_count == 0) { 3798 if (cpus_empty(net_dma->channel_mask)) {
3784 for_each_online_cpu(cpu) 3799 for_each_online_cpu(cpu)
3785 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); 3800 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3786 return; 3801 return;
@@ -3789,10 +3804,12 @@ static void net_dma_rebalance(void)
3789 i = 0; 3804 i = 0;
3790 cpu = first_cpu(cpu_online_map); 3805 cpu = first_cpu(cpu_online_map);
3791 3806
3792 rcu_read_lock(); 3807 for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
3793 list_for_each_entry(chan, &net_dma_client->channels, client_node) { 3808 chan = net_dma->channels[chan_idx];
3794 n = ((num_online_cpus() / net_dma_count) 3809
3795 + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); 3810 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
3811 + (i < (num_online_cpus() %
3812 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
3796 3813
3797 while(n) { 3814 while(n) {
3798 per_cpu(softnet_data, cpu).net_dma = chan; 3815 per_cpu(softnet_data, cpu).net_dma = chan;
@@ -3801,7 +3818,6 @@ static void net_dma_rebalance(void)
3801 } 3818 }
3802 i++; 3819 i++;
3803 } 3820 }
3804 rcu_read_unlock();
3805} 3821}
3806 3822
3807/** 3823/**
@@ -3810,23 +3826,53 @@ static void net_dma_rebalance(void)
3810 * @chan: DMA channel for the event 3826 * @chan: DMA channel for the event
3811 * @event: event type 3827 * @event: event type
3812 */ 3828 */
3813static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, 3829static enum dma_state_client
3814 enum dma_event event) 3830netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3815{ 3831 enum dma_state state)
3816 spin_lock(&net_dma_event_lock); 3832{
3817 switch (event) { 3833 int i, found = 0, pos = -1;
3818 case DMA_RESOURCE_ADDED: 3834 struct net_dma *net_dma =
3819 net_dma_count++; 3835 container_of(client, struct net_dma, client);
3820 net_dma_rebalance(); 3836 enum dma_state_client ack = DMA_DUP; /* default: take no action */
3837
3838 spin_lock(&net_dma->lock);
3839 switch (state) {
3840 case DMA_RESOURCE_AVAILABLE:
3841 for (i = 0; i < NR_CPUS; i++)
3842 if (net_dma->channels[i] == chan) {
3843 found = 1;
3844 break;
3845 } else if (net_dma->channels[i] == NULL && pos < 0)
3846 pos = i;
3847
3848 if (!found && pos >= 0) {
3849 ack = DMA_ACK;
3850 net_dma->channels[pos] = chan;
3851 cpu_set(pos, net_dma->channel_mask);
3852 net_dma_rebalance(net_dma);
3853 }
3821 break; 3854 break;
3822 case DMA_RESOURCE_REMOVED: 3855 case DMA_RESOURCE_REMOVED:
3823 net_dma_count--; 3856 for (i = 0; i < NR_CPUS; i++)
3824 net_dma_rebalance(); 3857 if (net_dma->channels[i] == chan) {
3858 found = 1;
3859 pos = i;
3860 break;
3861 }
3862
3863 if (found) {
3864 ack = DMA_ACK;
3865 cpu_clear(pos, net_dma->channel_mask);
3866 net_dma->channels[i] = NULL;
3867 net_dma_rebalance(net_dma);
3868 }
3825 break; 3869 break;
3826 default: 3870 default:
3827 break; 3871 break;
3828 } 3872 }
3829 spin_unlock(&net_dma_event_lock); 3873 spin_unlock(&net_dma->lock);
3874
3875 return ack;
3830} 3876}
3831 3877
3832/** 3878/**
@@ -3834,12 +3880,10 @@ static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3834 */ 3880 */
3835static int __init netdev_dma_register(void) 3881static int __init netdev_dma_register(void)
3836{ 3882{
3837 spin_lock_init(&net_dma_event_lock); 3883 spin_lock_init(&net_dma.lock);
3838 net_dma_client = dma_async_client_register(netdev_dma_event); 3884 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
3839 if (net_dma_client == NULL) 3885 dma_async_client_register(&net_dma.client);
3840 return -ENOMEM; 3886 dma_async_client_chan_request(&net_dma.client);
3841
3842 dma_async_client_chan_request(net_dma_client, num_online_cpus());
3843 return 0; 3887 return 0;
3844} 3888}
3845 3889
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 450f44bb2c8e..987b94403be5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1116,6 +1116,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1116 long timeo; 1116 long timeo;
1117 struct task_struct *user_recv = NULL; 1117 struct task_struct *user_recv = NULL;
1118 int copied_early = 0; 1118 int copied_early = 0;
1119 struct sk_buff *skb;
1119 1120
1120 lock_sock(sk); 1121 lock_sock(sk);
1121 1122
@@ -1142,16 +1143,26 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1142#ifdef CONFIG_NET_DMA 1143#ifdef CONFIG_NET_DMA
1143 tp->ucopy.dma_chan = NULL; 1144 tp->ucopy.dma_chan = NULL;
1144 preempt_disable(); 1145 preempt_disable();
1145 if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && 1146 skb = skb_peek_tail(&sk->sk_receive_queue);
1146 !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { 1147 {
1147 preempt_enable_no_resched(); 1148 int available = 0;
1148 tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); 1149
1149 } else 1150 if (skb)
1150 preempt_enable_no_resched(); 1151 available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
1152 if ((available < target) &&
1153 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1154 !sysctl_tcp_low_latency &&
1155 __get_cpu_var(softnet_data).net_dma) {
1156 preempt_enable_no_resched();
1157 tp->ucopy.pinned_list =
1158 dma_pin_iovec_pages(msg->msg_iov, len);
1159 } else {
1160 preempt_enable_no_resched();
1161 }
1162 }
1151#endif 1163#endif
1152 1164
1153 do { 1165 do {
1154 struct sk_buff *skb;
1155 u32 offset; 1166 u32 offset;
1156 1167
1157 /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ 1168 /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
@@ -1439,7 +1450,6 @@ skip_copy:
1439 1450
1440#ifdef CONFIG_NET_DMA 1451#ifdef CONFIG_NET_DMA
1441 if (tp->ucopy.dma_chan) { 1452 if (tp->ucopy.dma_chan) {
1442 struct sk_buff *skb;
1443 dma_cookie_t done, used; 1453 dma_cookie_t done, used;
1444 1454
1445 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan); 1455 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);