aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/scsi/ahci.c30
-rw-r--r--drivers/scsi/libata-core.c123
-rw-r--r--drivers/scsi/libata-scsi.c14
-rw-r--r--drivers/scsi/pdc_adma.c8
-rw-r--r--drivers/scsi/sata_mv.c17
-rw-r--r--drivers/scsi/sata_qstor.c8
-rw-r--r--drivers/scsi/sata_sil24.c15
-rw-r--r--drivers/scsi/sata_sx4.c13
-rw-r--r--include/linux/libata.h39
9 files changed, 209 insertions, 58 deletions
diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c
index fe8187d6f58b..cbab5868ffdf 100644
--- a/drivers/scsi/ahci.c
+++ b/drivers/scsi/ahci.c
@@ -314,8 +314,15 @@ static int ahci_port_start(struct ata_port *ap)
314 return -ENOMEM; 314 return -ENOMEM;
315 memset(pp, 0, sizeof(*pp)); 315 memset(pp, 0, sizeof(*pp));
316 316
317 ap->pad = dma_alloc_coherent(dev, ATA_DMA_PAD_BUF_SZ, &ap->pad_dma, GFP_KERNEL);
318 if (!ap->pad) {
319 kfree(pp);
320 return -ENOMEM;
321 }
322
317 mem = dma_alloc_coherent(dev, AHCI_PORT_PRIV_DMA_SZ, &mem_dma, GFP_KERNEL); 323 mem = dma_alloc_coherent(dev, AHCI_PORT_PRIV_DMA_SZ, &mem_dma, GFP_KERNEL);
318 if (!mem) { 324 if (!mem) {
325 dma_free_coherent(dev, ATA_DMA_PAD_BUF_SZ, ap->pad, ap->pad_dma);
319 kfree(pp); 326 kfree(pp);
320 return -ENOMEM; 327 return -ENOMEM;
321 } 328 }
@@ -391,6 +398,7 @@ static void ahci_port_stop(struct ata_port *ap)
391 ap->private_data = NULL; 398 ap->private_data = NULL;
392 dma_free_coherent(dev, AHCI_PORT_PRIV_DMA_SZ, 399 dma_free_coherent(dev, AHCI_PORT_PRIV_DMA_SZ,
393 pp->cmd_slot, pp->cmd_slot_dma); 400 pp->cmd_slot, pp->cmd_slot_dma);
401 dma_free_coherent(dev, ATA_DMA_PAD_BUF_SZ, ap->pad, ap->pad_dma);
394 kfree(pp); 402 kfree(pp);
395} 403}
396 404
@@ -476,23 +484,23 @@ static void ahci_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
476static void ahci_fill_sg(struct ata_queued_cmd *qc) 484static void ahci_fill_sg(struct ata_queued_cmd *qc)
477{ 485{
478 struct ahci_port_priv *pp = qc->ap->private_data; 486 struct ahci_port_priv *pp = qc->ap->private_data;
479 unsigned int i; 487 struct scatterlist *sg;
488 struct ahci_sg *ahci_sg;
480 489
481 VPRINTK("ENTER\n"); 490 VPRINTK("ENTER\n");
482 491
483 /* 492 /*
484 * Next, the S/G list. 493 * Next, the S/G list.
485 */ 494 */
486 for (i = 0; i < qc->n_elem; i++) { 495 ahci_sg = pp->cmd_tbl_sg;
487 u32 sg_len; 496 ata_for_each_sg(sg, qc) {
488 dma_addr_t addr; 497 dma_addr_t addr = sg_dma_address(sg);
489 498 u32 sg_len = sg_dma_len(sg);
490 addr = sg_dma_address(&qc->sg[i]); 499
491 sg_len = sg_dma_len(&qc->sg[i]); 500 ahci_sg->addr = cpu_to_le32(addr & 0xffffffff);
492 501 ahci_sg->addr_hi = cpu_to_le32((addr >> 16) >> 16);
493 pp->cmd_tbl_sg[i].addr = cpu_to_le32(addr & 0xffffffff); 502 ahci_sg->flags_size = cpu_to_le32(sg_len - 1);
494 pp->cmd_tbl_sg[i].addr_hi = cpu_to_le32((addr >> 16) >> 16); 503 ahci_sg++;
495 pp->cmd_tbl_sg[i].flags_size = cpu_to_le32(sg_len - 1);
496 } 504 }
497} 505}
498 506
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index f53d7b8ac33f..64f30bf59315 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -2444,8 +2444,9 @@ static void ata_dev_init_params(struct ata_port *ap, struct ata_device *dev)
2444static void ata_sg_clean(struct ata_queued_cmd *qc) 2444static void ata_sg_clean(struct ata_queued_cmd *qc)
2445{ 2445{
2446 struct ata_port *ap = qc->ap; 2446 struct ata_port *ap = qc->ap;
2447 struct scatterlist *sg = qc->sg; 2447 struct scatterlist *sg = qc->__sg;
2448 int dir = qc->dma_dir; 2448 int dir = qc->dma_dir;
2449 void *pad_buf = NULL;
2449 2450
2450 assert(qc->flags & ATA_QCFLAG_DMAMAP); 2451 assert(qc->flags & ATA_QCFLAG_DMAMAP);
2451 assert(sg != NULL); 2452 assert(sg != NULL);
@@ -2455,14 +2456,35 @@ static void ata_sg_clean(struct ata_queued_cmd *qc)
2455 2456
2456 DPRINTK("unmapping %u sg elements\n", qc->n_elem); 2457 DPRINTK("unmapping %u sg elements\n", qc->n_elem);
2457 2458
2458 if (qc->flags & ATA_QCFLAG_SG) 2459 /* if we padded the buffer out to 32-bit bound, and data
2460 * xfer direction is from-device, we must copy from the
2461 * pad buffer back into the supplied buffer
2462 */
2463 if (qc->pad_len && !(qc->tf.flags & ATA_TFLAG_WRITE))
2464 pad_buf = ap->pad + (qc->tag * ATA_DMA_PAD_SZ);
2465
2466 if (qc->flags & ATA_QCFLAG_SG) {
2459 dma_unmap_sg(ap->host_set->dev, sg, qc->n_elem, dir); 2467 dma_unmap_sg(ap->host_set->dev, sg, qc->n_elem, dir);
2460 else 2468 /* restore last sg */
2469 sg[qc->orig_n_elem - 1].length += qc->pad_len;
2470 if (pad_buf) {
2471 struct scatterlist *psg = &qc->pad_sgent;
2472 void *addr = kmap_atomic(psg->page, KM_IRQ0);
2473 memcpy(addr + psg->offset, pad_buf, qc->pad_len);
2474 kunmap_atomic(psg->page, KM_IRQ0);
2475 }
2476 } else {
2461 dma_unmap_single(ap->host_set->dev, sg_dma_address(&sg[0]), 2477 dma_unmap_single(ap->host_set->dev, sg_dma_address(&sg[0]),
2462 sg_dma_len(&sg[0]), dir); 2478 sg_dma_len(&sg[0]), dir);
2479 /* restore sg */
2480 sg->length += qc->pad_len;
2481 if (pad_buf)
2482 memcpy(qc->buf_virt + sg->length - qc->pad_len,
2483 pad_buf, qc->pad_len);
2484 }
2463 2485
2464 qc->flags &= ~ATA_QCFLAG_DMAMAP; 2486 qc->flags &= ~ATA_QCFLAG_DMAMAP;
2465 qc->sg = NULL; 2487 qc->__sg = NULL;
2466} 2488}
2467 2489
2468/** 2490/**
@@ -2478,15 +2500,15 @@ static void ata_sg_clean(struct ata_queued_cmd *qc)
2478 */ 2500 */
2479static void ata_fill_sg(struct ata_queued_cmd *qc) 2501static void ata_fill_sg(struct ata_queued_cmd *qc)
2480{ 2502{
2481 struct scatterlist *sg = qc->sg;
2482 struct ata_port *ap = qc->ap; 2503 struct ata_port *ap = qc->ap;
2483 unsigned int idx, nelem; 2504 struct scatterlist *sg;
2505 unsigned int idx;
2484 2506
2485 assert(sg != NULL); 2507 assert(qc->__sg != NULL);
2486 assert(qc->n_elem > 0); 2508 assert(qc->n_elem > 0);
2487 2509
2488 idx = 0; 2510 idx = 0;
2489 for (nelem = qc->n_elem; nelem; nelem--,sg++) { 2511 ata_for_each_sg(sg, qc) {
2490 u32 addr, offset; 2512 u32 addr, offset;
2491 u32 sg_len, len; 2513 u32 sg_len, len;
2492 2514
@@ -2577,11 +2599,12 @@ void ata_sg_init_one(struct ata_queued_cmd *qc, void *buf, unsigned int buflen)
2577 qc->flags |= ATA_QCFLAG_SINGLE; 2599 qc->flags |= ATA_QCFLAG_SINGLE;
2578 2600
2579 memset(&qc->sgent, 0, sizeof(qc->sgent)); 2601 memset(&qc->sgent, 0, sizeof(qc->sgent));
2580 qc->sg = &qc->sgent; 2602 qc->__sg = &qc->sgent;
2581 qc->n_elem = 1; 2603 qc->n_elem = 1;
2604 qc->orig_n_elem = 1;
2582 qc->buf_virt = buf; 2605 qc->buf_virt = buf;
2583 2606
2584 sg = qc->sg; 2607 sg = qc->__sg;
2585 sg->page = virt_to_page(buf); 2608 sg->page = virt_to_page(buf);
2586 sg->offset = (unsigned long) buf & ~PAGE_MASK; 2609 sg->offset = (unsigned long) buf & ~PAGE_MASK;
2587 sg->length = buflen; 2610 sg->length = buflen;
@@ -2605,8 +2628,9 @@ void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
2605 unsigned int n_elem) 2628 unsigned int n_elem)
2606{ 2629{
2607 qc->flags |= ATA_QCFLAG_SG; 2630 qc->flags |= ATA_QCFLAG_SG;
2608 qc->sg = sg; 2631 qc->__sg = sg;
2609 qc->n_elem = n_elem; 2632 qc->n_elem = n_elem;
2633 qc->orig_n_elem = n_elem;
2610} 2634}
2611 2635
2612/** 2636/**
@@ -2626,9 +2650,32 @@ static int ata_sg_setup_one(struct ata_queued_cmd *qc)
2626{ 2650{
2627 struct ata_port *ap = qc->ap; 2651 struct ata_port *ap = qc->ap;
2628 int dir = qc->dma_dir; 2652 int dir = qc->dma_dir;
2629 struct scatterlist *sg = qc->sg; 2653 struct scatterlist *sg = qc->__sg;
2630 dma_addr_t dma_address; 2654 dma_addr_t dma_address;
2631 2655
2656 /* we must lengthen transfers to end on a 32-bit boundary */
2657 qc->pad_len = sg->length & 3;
2658 if (qc->pad_len) {
2659 void *pad_buf = ap->pad + (qc->tag * ATA_DMA_PAD_SZ);
2660 struct scatterlist *psg = &qc->pad_sgent;
2661
2662 assert(qc->dev->class == ATA_DEV_ATAPI);
2663
2664 memset(pad_buf, 0, ATA_DMA_PAD_SZ);
2665
2666 if (qc->tf.flags & ATA_TFLAG_WRITE)
2667 memcpy(pad_buf, qc->buf_virt + sg->length - qc->pad_len,
2668 qc->pad_len);
2669
2670 sg_dma_address(psg) = ap->pad_dma + (qc->tag * ATA_DMA_PAD_SZ);
2671 sg_dma_len(psg) = ATA_DMA_PAD_SZ;
2672 /* trim sg */
2673 sg->length -= qc->pad_len;
2674
2675 DPRINTK("padding done, sg->length=%u pad_len=%u\n",
2676 sg->length, qc->pad_len);
2677 }
2678
2632 dma_address = dma_map_single(ap->host_set->dev, qc->buf_virt, 2679 dma_address = dma_map_single(ap->host_set->dev, qc->buf_virt,
2633 sg->length, dir); 2680 sg->length, dir);
2634 if (dma_mapping_error(dma_address)) 2681 if (dma_mapping_error(dma_address))
@@ -2660,12 +2707,47 @@ static int ata_sg_setup_one(struct ata_queued_cmd *qc)
2660static int ata_sg_setup(struct ata_queued_cmd *qc) 2707static int ata_sg_setup(struct ata_queued_cmd *qc)
2661{ 2708{
2662 struct ata_port *ap = qc->ap; 2709 struct ata_port *ap = qc->ap;
2663 struct scatterlist *sg = qc->sg; 2710 struct scatterlist *sg = qc->__sg;
2711 struct scatterlist *lsg = &sg[qc->n_elem - 1];
2664 int n_elem, dir; 2712 int n_elem, dir;
2665 2713
2666 VPRINTK("ENTER, ata%u\n", ap->id); 2714 VPRINTK("ENTER, ata%u\n", ap->id);
2667 assert(qc->flags & ATA_QCFLAG_SG); 2715 assert(qc->flags & ATA_QCFLAG_SG);
2668 2716
2717 /* we must lengthen transfers to end on a 32-bit boundary */
2718 qc->pad_len = lsg->length & 3;
2719 if (qc->pad_len) {
2720 void *pad_buf = ap->pad + (qc->tag * ATA_DMA_PAD_SZ);
2721 struct scatterlist *psg = &qc->pad_sgent;
2722 unsigned int offset;
2723
2724 assert(qc->dev->class == ATA_DEV_ATAPI);
2725
2726 memset(pad_buf, 0, ATA_DMA_PAD_SZ);
2727
2728 /*
2729 * psg->page/offset are used to copy to-be-written
2730 * data in this function or read data in ata_sg_clean.
2731 */
2732 offset = lsg->offset + lsg->length - qc->pad_len;
2733 psg->page = nth_page(lsg->page, offset >> PAGE_SHIFT);
2734 psg->offset = offset_in_page(offset);
2735
2736 if (qc->tf.flags & ATA_TFLAG_WRITE) {
2737 void *addr = kmap_atomic(psg->page, KM_IRQ0);
2738 memcpy(pad_buf, addr + psg->offset, qc->pad_len);
2739 kunmap_atomic(psg->page, KM_IRQ0);
2740 }
2741
2742 sg_dma_address(psg) = ap->pad_dma + (qc->tag * ATA_DMA_PAD_SZ);
2743 sg_dma_len(psg) = ATA_DMA_PAD_SZ;
2744 /* trim last sg */
2745 lsg->length -= qc->pad_len;
2746
2747 DPRINTK("padding done, sg[%d].length=%u pad_len=%u\n",
2748 qc->n_elem - 1, lsg->length, qc->pad_len);
2749 }
2750
2669 dir = qc->dma_dir; 2751 dir = qc->dma_dir;
2670 n_elem = dma_map_sg(ap->host_set->dev, sg, qc->n_elem, dir); 2752 n_elem = dma_map_sg(ap->host_set->dev, sg, qc->n_elem, dir);
2671 if (n_elem < 1) 2753 if (n_elem < 1)
@@ -2941,7 +3023,7 @@ static void ata_data_xfer(struct ata_port *ap, unsigned char *buf,
2941static void ata_pio_sector(struct ata_queued_cmd *qc) 3023static void ata_pio_sector(struct ata_queued_cmd *qc)
2942{ 3024{
2943 int do_write = (qc->tf.flags & ATA_TFLAG_WRITE); 3025 int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
2944 struct scatterlist *sg = qc->sg; 3026 struct scatterlist *sg = qc->__sg;
2945 struct ata_port *ap = qc->ap; 3027 struct ata_port *ap = qc->ap;
2946 struct page *page; 3028 struct page *page;
2947 unsigned int offset; 3029 unsigned int offset;
@@ -2991,7 +3073,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc)
2991static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes) 3073static void __atapi_pio_bytes(struct ata_queued_cmd *qc, unsigned int bytes)
2992{ 3074{
2993 int do_write = (qc->tf.flags & ATA_TFLAG_WRITE); 3075 int do_write = (qc->tf.flags & ATA_TFLAG_WRITE);
2994 struct scatterlist *sg = qc->sg; 3076 struct scatterlist *sg = qc->__sg;
2995 struct ata_port *ap = qc->ap; 3077 struct ata_port *ap = qc->ap;
2996 struct page *page; 3078 struct page *page;
2997 unsigned char *buf; 3079 unsigned char *buf;
@@ -3024,7 +3106,7 @@ next_sg:
3024 return; 3106 return;
3025 } 3107 }
3026 3108
3027 sg = &qc->sg[qc->cursg]; 3109 sg = &qc->__sg[qc->cursg];
3028 3110
3029 page = sg->page; 3111 page = sg->page;
3030 offset = sg->offset + qc->cursg_ofs; 3112 offset = sg->offset + qc->cursg_ofs;
@@ -3384,7 +3466,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
3384 3466
3385 qc = ata_qc_new(ap); 3467 qc = ata_qc_new(ap);
3386 if (qc) { 3468 if (qc) {
3387 qc->sg = NULL; 3469 qc->__sg = NULL;
3388 qc->flags = 0; 3470 qc->flags = 0;
3389 qc->scsicmd = NULL; 3471 qc->scsicmd = NULL;
3390 qc->ap = ap; 3472 qc->ap = ap;
@@ -4071,6 +4153,12 @@ int ata_port_start (struct ata_port *ap)
4071 if (!ap->prd) 4153 if (!ap->prd)
4072 return -ENOMEM; 4154 return -ENOMEM;
4073 4155
4156 ap->pad = dma_alloc_coherent(dev, ATA_DMA_PAD_BUF_SZ, &ap->pad_dma, GFP_KERNEL);
4157 if (!ap->pad) {
4158 dma_free_coherent(dev, ATA_PRD_TBL_SZ, ap->prd, ap->prd_dma);
4159 return -ENOMEM;
4160 }
4161
4074 DPRINTK("prd alloc, virt %p, dma %llx\n", ap->prd, (unsigned long long) ap->prd_dma); 4162 DPRINTK("prd alloc, virt %p, dma %llx\n", ap->prd, (unsigned long long) ap->prd_dma);
4075 4163
4076 return 0; 4164 return 0;
@@ -4094,6 +4182,7 @@ void ata_port_stop (struct ata_port *ap)
4094 struct device *dev = ap->host_set->dev; 4182 struct device *dev = ap->host_set->dev;
4095 4183
4096 dma_free_coherent(dev, ATA_PRD_TBL_SZ, ap->prd, ap->prd_dma); 4184 dma_free_coherent(dev, ATA_PRD_TBL_SZ, ap->prd, ap->prd_dma);
4185 dma_free_coherent(dev, ATA_DMA_PAD_BUF_SZ, ap->pad, ap->pad_dma);
4097} 4186}
4098 4187
4099void ata_host_stop (struct ata_host_set *host_set) 4188void ata_host_stop (struct ata_host_set *host_set)
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 89a04b1a5a0e..69058510f43a 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -354,10 +354,10 @@ struct ata_queued_cmd *ata_scsi_qc_new(struct ata_port *ap,
354 qc->scsidone = done; 354 qc->scsidone = done;
355 355
356 if (cmd->use_sg) { 356 if (cmd->use_sg) {
357 qc->sg = (struct scatterlist *) cmd->request_buffer; 357 qc->__sg = (struct scatterlist *) cmd->request_buffer;
358 qc->n_elem = cmd->use_sg; 358 qc->n_elem = cmd->use_sg;
359 } else { 359 } else {
360 qc->sg = &qc->sgent; 360 qc->__sg = &qc->sgent;
361 qc->n_elem = 1; 361 qc->n_elem = 1;
362 } 362 }
363 } else { 363 } else {
@@ -693,6 +693,16 @@ int ata_scsi_slave_config(struct scsi_device *sdev)
693 */ 693 */
694 blk_queue_max_sectors(sdev->request_queue, 2048); 694 blk_queue_max_sectors(sdev->request_queue, 2048);
695 } 695 }
696
697 /*
698 * SATA DMA transfers must be multiples of 4 byte, so
699 * we need to pad ATAPI transfers using an extra sg.
700 * Decrement max hw segments accordingly.
701 */
702 if (dev->class == ATA_DEV_ATAPI) {
703 request_queue_t *q = sdev->request_queue;
704 blk_queue_max_hw_segments(q, q->max_hw_segments - 1);
705 }
696 } 706 }
697 707
698 return 0; /* scsi layer doesn't check return value, sigh */ 708 return 0; /* scsi layer doesn't check return value, sigh */
diff --git a/drivers/scsi/pdc_adma.c b/drivers/scsi/pdc_adma.c
index 7999817915c3..eebb3eb20255 100644
--- a/drivers/scsi/pdc_adma.c
+++ b/drivers/scsi/pdc_adma.c
@@ -292,14 +292,14 @@ static void adma_eng_timeout(struct ata_port *ap)
292 292
293static int adma_fill_sg(struct ata_queued_cmd *qc) 293static int adma_fill_sg(struct ata_queued_cmd *qc)
294{ 294{
295 struct scatterlist *sg = qc->sg; 295 struct scatterlist *sg;
296 struct ata_port *ap = qc->ap; 296 struct ata_port *ap = qc->ap;
297 struct adma_port_priv *pp = ap->private_data; 297 struct adma_port_priv *pp = ap->private_data;
298 u8 *buf = pp->pkt; 298 u8 *buf = pp->pkt;
299 int nelem, i = (2 + buf[3]) * 8; 299 int i = (2 + buf[3]) * 8;
300 u8 pFLAGS = pORD | ((qc->tf.flags & ATA_TFLAG_WRITE) ? pDIRO : 0); 300 u8 pFLAGS = pORD | ((qc->tf.flags & ATA_TFLAG_WRITE) ? pDIRO : 0);
301 301
302 for (nelem = 0; nelem < qc->n_elem; nelem++,sg++) { 302 ata_for_each_sg(sg, qc) {
303 u32 addr; 303 u32 addr;
304 u32 len; 304 u32 len;
305 305
@@ -311,7 +311,7 @@ static int adma_fill_sg(struct ata_queued_cmd *qc)
311 *(__le32 *)(buf + i) = cpu_to_le32(len); 311 *(__le32 *)(buf + i) = cpu_to_le32(len);
312 i += 4; 312 i += 4;
313 313
314 if ((nelem + 1) == qc->n_elem) 314 if (ata_sg_is_last(sg, qc))
315 pFLAGS |= pEND; 315 pFLAGS |= pEND;
316 buf[i++] = pFLAGS; 316 buf[i++] = pFLAGS;
317 buf[i++] = qc->dev->dma_mode & 0xf; 317 buf[i++] = qc->dev->dma_mode & 0xf;
diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c
index 422e0b6f603a..1d6d0c9e76f6 100644
--- a/drivers/scsi/sata_mv.c
+++ b/drivers/scsi/sata_mv.c
@@ -785,23 +785,24 @@ static void mv_port_stop(struct ata_port *ap)
785static void mv_fill_sg(struct ata_queued_cmd *qc) 785static void mv_fill_sg(struct ata_queued_cmd *qc)
786{ 786{
787 struct mv_port_priv *pp = qc->ap->private_data; 787 struct mv_port_priv *pp = qc->ap->private_data;
788 unsigned int i; 788 unsigned int i = 0;
789 struct scatterlist *sg;
789 790
790 for (i = 0; i < qc->n_elem; i++) { 791 ata_for_each_sg(sg, qc) {
791 u32 sg_len; 792 u32 sg_len;
792 dma_addr_t addr; 793 dma_addr_t addr;
793 794
794 addr = sg_dma_address(&qc->sg[i]); 795 addr = sg_dma_address(sg);
795 sg_len = sg_dma_len(&qc->sg[i]); 796 sg_len = sg_dma_len(sg);
796 797
797 pp->sg_tbl[i].addr = cpu_to_le32(addr & 0xffffffff); 798 pp->sg_tbl[i].addr = cpu_to_le32(addr & 0xffffffff);
798 pp->sg_tbl[i].addr_hi = cpu_to_le32((addr >> 16) >> 16); 799 pp->sg_tbl[i].addr_hi = cpu_to_le32((addr >> 16) >> 16);
799 assert(0 == (sg_len & ~MV_DMA_BOUNDARY)); 800 assert(0 == (sg_len & ~MV_DMA_BOUNDARY));
800 pp->sg_tbl[i].flags_size = cpu_to_le32(sg_len); 801 pp->sg_tbl[i].flags_size = cpu_to_le32(sg_len);
801 } 802 if (ata_sg_is_last(sg, qc))
802 if (0 < qc->n_elem) { 803 pp->sg_tbl[i].flags_size |= cpu_to_le32(EPRD_FLAG_END_OF_TBL);
803 pp->sg_tbl[qc->n_elem - 1].flags_size |= 804
804 cpu_to_le32(EPRD_FLAG_END_OF_TBL); 805 i++;
805 } 806 }
806} 807}
807 808
diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c
index 250dafa6bc36..de3f266d67b3 100644
--- a/drivers/scsi/sata_qstor.c
+++ b/drivers/scsi/sata_qstor.c
@@ -270,16 +270,17 @@ static void qs_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val)
270 270
271static void qs_fill_sg(struct ata_queued_cmd *qc) 271static void qs_fill_sg(struct ata_queued_cmd *qc)
272{ 272{
273 struct scatterlist *sg = qc->sg; 273 struct scatterlist *sg;
274 struct ata_port *ap = qc->ap; 274 struct ata_port *ap = qc->ap;
275 struct qs_port_priv *pp = ap->private_data; 275 struct qs_port_priv *pp = ap->private_data;
276 unsigned int nelem; 276 unsigned int nelem;
277 u8 *prd = pp->pkt + QS_CPB_BYTES; 277 u8 *prd = pp->pkt + QS_CPB_BYTES;
278 278
279 assert(sg != NULL); 279 assert(qc->__sg != NULL);
280 assert(qc->n_elem > 0); 280 assert(qc->n_elem > 0);
281 281
282 for (nelem = 0; nelem < qc->n_elem; nelem++,sg++) { 282 nelem = 0;
283 ata_for_each_sg(sg, qc) {
283 u64 addr; 284 u64 addr;
284 u32 len; 285 u32 len;
285 286
@@ -293,6 +294,7 @@ static void qs_fill_sg(struct ata_queued_cmd *qc)
293 294
294 VPRINTK("PRD[%u] = (0x%llX, 0x%X)\n", nelem, 295 VPRINTK("PRD[%u] = (0x%llX, 0x%X)\n", nelem,
295 (unsigned long long)addr, len); 296 (unsigned long long)addr, len);
297 nelem++;
296 } 298 }
297} 299}
298 300
diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c
index 32d730bd5bb6..e0d27a0fbad0 100644
--- a/drivers/scsi/sata_sil24.c
+++ b/drivers/scsi/sata_sil24.c
@@ -416,15 +416,20 @@ static void sil24_phy_reset(struct ata_port *ap)
416static inline void sil24_fill_sg(struct ata_queued_cmd *qc, 416static inline void sil24_fill_sg(struct ata_queued_cmd *qc,
417 struct sil24_cmd_block *cb) 417 struct sil24_cmd_block *cb)
418{ 418{
419 struct scatterlist *sg = qc->sg;
420 struct sil24_sge *sge = cb->sge; 419 struct sil24_sge *sge = cb->sge;
421 unsigned i; 420 struct scatterlist *sg;
421 unsigned int idx = 0;
422 422
423 for (i = 0; i < qc->n_elem; i++, sg++, sge++) { 423 ata_for_each_sg(sg, qc) {
424 sge->addr = cpu_to_le64(sg_dma_address(sg)); 424 sge->addr = cpu_to_le64(sg_dma_address(sg));
425 sge->cnt = cpu_to_le32(sg_dma_len(sg)); 425 sge->cnt = cpu_to_le32(sg_dma_len(sg));
426 sge->flags = 0; 426 if (ata_sg_is_last(sg, qc))
427 sge->flags = i < qc->n_elem - 1 ? 0 : cpu_to_le32(SGE_TRM); 427 sge->flags = cpu_to_le32(SGE_TRM);
428 else
429 sge->flags = 0;
430
431 sge++;
432 idx++;
428 } 433 }
429} 434}
430 435
diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c
index af08f4f650c1..8710d0f14f93 100644
--- a/drivers/scsi/sata_sx4.c
+++ b/drivers/scsi/sata_sx4.c
@@ -449,14 +449,14 @@ static inline void pdc20621_host_pkt(struct ata_taskfile *tf, u8 *buf,
449 449
450static void pdc20621_dma_prep(struct ata_queued_cmd *qc) 450static void pdc20621_dma_prep(struct ata_queued_cmd *qc)
451{ 451{
452 struct scatterlist *sg = qc->sg; 452 struct scatterlist *sg;
453 struct ata_port *ap = qc->ap; 453 struct ata_port *ap = qc->ap;
454 struct pdc_port_priv *pp = ap->private_data; 454 struct pdc_port_priv *pp = ap->private_data;
455 void __iomem *mmio = ap->host_set->mmio_base; 455 void __iomem *mmio = ap->host_set->mmio_base;
456 struct pdc_host_priv *hpriv = ap->host_set->private_data; 456 struct pdc_host_priv *hpriv = ap->host_set->private_data;
457 void __iomem *dimm_mmio = hpriv->dimm_mmio; 457 void __iomem *dimm_mmio = hpriv->dimm_mmio;
458 unsigned int portno = ap->port_no; 458 unsigned int portno = ap->port_no;
459 unsigned int i, last, idx, total_len = 0, sgt_len; 459 unsigned int i, idx, total_len = 0, sgt_len;
460 u32 *buf = (u32 *) &pp->dimm_buf[PDC_DIMM_HEADER_SZ]; 460 u32 *buf = (u32 *) &pp->dimm_buf[PDC_DIMM_HEADER_SZ];
461 461
462 assert(qc->flags & ATA_QCFLAG_DMAMAP); 462 assert(qc->flags & ATA_QCFLAG_DMAMAP);
@@ -469,12 +469,11 @@ static void pdc20621_dma_prep(struct ata_queued_cmd *qc)
469 /* 469 /*
470 * Build S/G table 470 * Build S/G table
471 */ 471 */
472 last = qc->n_elem;
473 idx = 0; 472 idx = 0;
474 for (i = 0; i < last; i++) { 473 ata_for_each_sg(sg, qc) {
475 buf[idx++] = cpu_to_le32(sg_dma_address(&sg[i])); 474 buf[idx++] = cpu_to_le32(sg_dma_address(sg));
476 buf[idx++] = cpu_to_le32(sg_dma_len(&sg[i])); 475 buf[idx++] = cpu_to_le32(sg_dma_len(sg));
477 total_len += sg_dma_len(&sg[i]); 476 total_len += sg_dma_len(sg);
478 } 477 }
479 buf[idx - 1] |= cpu_to_le32(ATA_PRD_EOT); 478 buf[idx - 1] |= cpu_to_le32(ATA_PRD_EOT);
480 sgt_len = idx * 4; 479 sgt_len = idx * 4;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 00a8a5738858..5f17203520cb 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -155,6 +155,10 @@ enum {
155 ATA_SHIFT_UDMA = 0, 155 ATA_SHIFT_UDMA = 0,
156 ATA_SHIFT_MWDMA = 8, 156 ATA_SHIFT_MWDMA = 8,
157 ATA_SHIFT_PIO = 11, 157 ATA_SHIFT_PIO = 11,
158
159 /* size of buffer to pad xfers ending on unaligned boundaries */
160 ATA_DMA_PAD_SZ = 4,
161 ATA_DMA_PAD_BUF_SZ = ATA_DMA_PAD_SZ * ATA_MAX_QUEUE,
158 162
159 /* Masks for port functions */ 163 /* Masks for port functions */
160 ATA_PORT_PRIMARY = (1 << 0), 164 ATA_PORT_PRIMARY = (1 << 0),
@@ -242,9 +246,12 @@ struct ata_queued_cmd {
242 unsigned long flags; /* ATA_QCFLAG_xxx */ 246 unsigned long flags; /* ATA_QCFLAG_xxx */
243 unsigned int tag; 247 unsigned int tag;
244 unsigned int n_elem; 248 unsigned int n_elem;
249 unsigned int orig_n_elem;
245 250
246 int dma_dir; 251 int dma_dir;
247 252
253 unsigned int pad_len;
254
248 unsigned int nsect; 255 unsigned int nsect;
249 unsigned int cursect; 256 unsigned int cursect;
250 257
@@ -255,9 +262,11 @@ struct ata_queued_cmd {
255 unsigned int cursg_ofs; 262 unsigned int cursg_ofs;
256 263
257 struct scatterlist sgent; 264 struct scatterlist sgent;
265 struct scatterlist pad_sgent;
258 void *buf_virt; 266 void *buf_virt;
259 267
260 struct scatterlist *sg; 268 /* DO NOT iterate over __sg manually, use ata_for_each_sg() */
269 struct scatterlist *__sg;
261 270
262 ata_qc_cb_t complete_fn; 271 ata_qc_cb_t complete_fn;
263 272
@@ -303,6 +312,9 @@ struct ata_port {
303 struct ata_prd *prd; /* our SG list */ 312 struct ata_prd *prd; /* our SG list */
304 dma_addr_t prd_dma; /* and its DMA mapping */ 313 dma_addr_t prd_dma; /* and its DMA mapping */
305 314
315 void *pad; /* array of DMA pad buffers */
316 dma_addr_t pad_dma;
317
306 struct ata_ioports ioaddr; /* ATA cmd/ctl/dma register blocks */ 318 struct ata_ioports ioaddr; /* ATA cmd/ctl/dma register blocks */
307 319
308 u8 ctl; /* cache of ATA control register */ 320 u8 ctl; /* cache of ATA control register */
@@ -507,6 +519,31 @@ extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bit
507#endif /* CONFIG_PCI */ 519#endif /* CONFIG_PCI */
508 520
509 521
522static inline int
523ata_sg_is_last(struct scatterlist *sg, struct ata_queued_cmd *qc)
524{
525 if (sg == &qc->pad_sgent)
526 return 1;
527 if (qc->pad_len)
528 return 0;
529 if (((sg - qc->__sg) + 1) == qc->n_elem)
530 return 1;
531 return 0;
532}
533
534static inline struct scatterlist *
535ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc)
536{
537 if (sg == &qc->pad_sgent)
538 return NULL;
539 if (++sg - qc->__sg < qc->n_elem)
540 return sg;
541 return qc->pad_len ? &qc->pad_sgent : NULL;
542}
543
544#define ata_for_each_sg(sg, qc) \
545 for (sg = qc->__sg; sg; sg = ata_qc_next_sg(sg, qc))
546
510static inline unsigned int ata_tag_valid(unsigned int tag) 547static inline unsigned int ata_tag_valid(unsigned int tag)
511{ 548{
512 return (tag < ATA_MAX_QUEUE) ? 1 : 0; 549 return (tag < ATA_MAX_QUEUE) ? 1 : 0;