aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/aoe/aoecmd.c
diff options
context:
space:
mode:
authorEd Cashin <ecashin@coraid.com>2012-10-04 20:16:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-05 14:05:24 -0400
commit3d5b06051cd5fa82c9a4285f7ce8650a0f0845ff (patch)
tree66b62def9b84b9d274f2d6b3589635015f98c56d /drivers/block/aoe/aoecmd.c
parenta336d29870f8a1f8e5f10d9f1aa95531c4edeabe (diff)
aoe: for performance support larger packet payloads
tAdd adds the ability to work with large packets composed of a number of segments, using the scatter gather feature of the block layer (biovecs) and the network layer (skb frag array). The motivation is the performance gained by using a packet data payload greater than a page size and by using the network card's scatter gather feature. Users of the out-of-tree aoe driver already had these changes, but since early 2011, they have complained of increased memory utilization and higher CPU utilization during heavy writes.[1] The commit below appears related, as it disables scatter gather on non-IP protocols inside the harmonize_features function, even when the NIC supports sg. commit f01a5236bd4b140198fbcc550f085e8361fd73fa Author: Jesse Gross <jesse@nicira.com> Date: Sun Jan 9 06:23:31 2011 +0000 net offloading: Generalize netif_get_vlan_features(). With that regression in place, transmits always linearize sg AoE packets, but in-kernel users did not have this patch. Before 2.6.38, though, these changes were working to allow sg to increase performance. 1. http://www.spinics.net/lists/linux-mm/msg15184.html Signed-off-by: Ed Cashin <ecashin@coraid.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/aoe/aoecmd.c')
-rw-r--r--drivers/block/aoe/aoecmd.c138
1 files changed, 96 insertions, 42 deletions
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 887f68f6d79a..9a58242290c0 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -165,7 +165,8 @@ freeframe(struct aoedev *d)
165 rf = f; 165 rf = f;
166 continue; 166 continue;
167 } 167 }
168gotone: skb_shinfo(skb)->nr_frags = skb->data_len = 0; 168gotone: skb->truesize -= skb->data_len;
169 skb_shinfo(skb)->nr_frags = skb->data_len = 0;
169 skb_trim(skb, 0); 170 skb_trim(skb, 0);
170 d->tgt = t; 171 d->tgt = t;
171 ifrotate(*t); 172 ifrotate(*t);
@@ -201,6 +202,24 @@ gotone: skb_shinfo(skb)->nr_frags = skb->data_len = 0;
201 return NULL; 202 return NULL;
202} 203}
203 204
205static void
206skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt)
207{
208 int frag = 0;
209 ulong fcnt;
210loop:
211 fcnt = bv->bv_len - (off - bv->bv_offset);
212 if (fcnt > cnt)
213 fcnt = cnt;
214 skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt);
215 cnt -= fcnt;
216 if (cnt <= 0)
217 return;
218 bv++;
219 off = bv->bv_offset;
220 goto loop;
221}
222
204static int 223static int
205aoecmd_ata_rw(struct aoedev *d) 224aoecmd_ata_rw(struct aoedev *d)
206{ 225{
@@ -211,7 +230,7 @@ aoecmd_ata_rw(struct aoedev *d)
211 struct bio_vec *bv; 230 struct bio_vec *bv;
212 struct aoetgt *t; 231 struct aoetgt *t;
213 struct sk_buff *skb; 232 struct sk_buff *skb;
214 ulong bcnt; 233 ulong bcnt, fbcnt;
215 char writebit, extbit; 234 char writebit, extbit;
216 235
217 writebit = 0x10; 236 writebit = 0x10;
@@ -226,8 +245,28 @@ aoecmd_ata_rw(struct aoedev *d)
226 bcnt = t->ifp->maxbcnt; 245 bcnt = t->ifp->maxbcnt;
227 if (bcnt == 0) 246 if (bcnt == 0)
228 bcnt = DEFAULTBCNT; 247 bcnt = DEFAULTBCNT;
229 if (bcnt > buf->bv_resid) 248 if (bcnt > buf->resid)
230 bcnt = buf->bv_resid; 249 bcnt = buf->resid;
250 fbcnt = bcnt;
251 f->bv = buf->bv;
252 f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid);
253 do {
254 if (fbcnt < buf->bv_resid) {
255 buf->bv_resid -= fbcnt;
256 buf->resid -= fbcnt;
257 break;
258 }
259 fbcnt -= buf->bv_resid;
260 buf->resid -= buf->bv_resid;
261 if (buf->resid == 0) {
262 d->inprocess = NULL;
263 break;
264 }
265 buf->bv++;
266 buf->bv_resid = buf->bv->bv_len;
267 WARN_ON(buf->bv_resid == 0);
268 } while (fbcnt);
269
231 /* initialize the headers & frame */ 270 /* initialize the headers & frame */
232 skb = f->skb; 271 skb = f->skb;
233 h = (struct aoe_hdr *) skb_mac_header(skb); 272 h = (struct aoe_hdr *) skb_mac_header(skb);
@@ -238,7 +277,6 @@ aoecmd_ata_rw(struct aoedev *d)
238 t->nout++; 277 t->nout++;
239 f->waited = 0; 278 f->waited = 0;
240 f->buf = buf; 279 f->buf = buf;
241 f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
242 f->bcnt = bcnt; 280 f->bcnt = bcnt;
243 f->lba = buf->sector; 281 f->lba = buf->sector;
244 282
@@ -253,10 +291,11 @@ aoecmd_ata_rw(struct aoedev *d)
253 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ 291 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
254 } 292 }
255 if (bio_data_dir(buf->bio) == WRITE) { 293 if (bio_data_dir(buf->bio) == WRITE) {
256 skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt); 294 skb_fillup(skb, f->bv, f->bv_off, bcnt);
257 ah->aflags |= AOEAFL_WRITE; 295 ah->aflags |= AOEAFL_WRITE;
258 skb->len += bcnt; 296 skb->len += bcnt;
259 skb->data_len = bcnt; 297 skb->data_len = bcnt;
298 skb->truesize += bcnt;
260 t->wpkts++; 299 t->wpkts++;
261 } else { 300 } else {
262 t->rpkts++; 301 t->rpkts++;
@@ -267,18 +306,7 @@ aoecmd_ata_rw(struct aoedev *d)
267 306
268 /* mark all tracking fields and load out */ 307 /* mark all tracking fields and load out */
269 buf->nframesout += 1; 308 buf->nframesout += 1;
270 buf->bv_off += bcnt;
271 buf->bv_resid -= bcnt;
272 buf->resid -= bcnt;
273 buf->sector += bcnt >> 9; 309 buf->sector += bcnt >> 9;
274 if (buf->resid == 0) {
275 d->inprocess = NULL;
276 } else if (buf->bv_resid == 0) {
277 buf->bv = ++bv;
278 buf->bv_resid = bv->bv_len;
279 WARN_ON(buf->bv_resid == 0);
280 buf->bv_off = bv->bv_offset;
281 }
282 310
283 skb->dev = t->ifp->nd; 311 skb->dev = t->ifp->nd;
284 skb = skb_clone(skb, GFP_ATOMIC); 312 skb = skb_clone(skb, GFP_ATOMIC);
@@ -365,14 +393,12 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
365 put_lba(ah, f->lba); 393 put_lba(ah, f->lba);
366 394
367 n = f->bcnt; 395 n = f->bcnt;
368 if (n > DEFAULTBCNT)
369 n = DEFAULTBCNT;
370 ah->scnt = n >> 9; 396 ah->scnt = n >> 9;
371 if (ah->aflags & AOEAFL_WRITE) { 397 if (ah->aflags & AOEAFL_WRITE) {
372 skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr), 398 skb_fillup(skb, f->bv, f->bv_off, n);
373 offset_in_page(f->bufaddr), n);
374 skb->len = sizeof *h + sizeof *ah + n; 399 skb->len = sizeof *h + sizeof *ah + n;
375 skb->data_len = n; 400 skb->data_len = n;
401 skb->truesize += n;
376 } 402 }
377 } 403 }
378 skb->dev = t->ifp->nd; 404 skb->dev = t->ifp->nd;
@@ -531,20 +557,6 @@ rexmit_timer(ulong vp)
531 ejectif(t, ifp); 557 ejectif(t, ifp);
532 ifp = NULL; 558 ifp = NULL;
533 } 559 }
534
535 if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
536 && ifp && ++ifp->lostjumbo > (t->nframes << 1)
537 && ifp->maxbcnt != DEFAULTBCNT) {
538 printk(KERN_INFO
539 "aoe: e%ld.%d: "
540 "too many lost jumbo on "
541 "%s:%pm - "
542 "falling back to %d frames.\n",
543 d->aoemajor, d->aoeminor,
544 ifp->nd->name, t->addr,
545 DEFAULTBCNT);
546 ifp->maxbcnt = 0;
547 }
548 resend(d, t, f); 560 resend(d, t, f);
549 } 561 }
550 562
@@ -737,6 +749,45 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector
737 part_stat_unlock(); 749 part_stat_unlock();
738} 750}
739 751
752static void
753bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, ulong cnt)
754{
755 ulong fcnt;
756 char *p;
757 int soff = 0;
758loop:
759 fcnt = bv->bv_len - (off - bv->bv_offset);
760 if (fcnt > cnt)
761 fcnt = cnt;
762 p = page_address(bv->bv_page) + off;
763 skb_copy_bits(skb, soff, p, fcnt);
764 soff += fcnt;
765 cnt -= fcnt;
766 if (cnt <= 0)
767 return;
768 bv++;
769 off = bv->bv_offset;
770 goto loop;
771}
772
773static void
774fadvance(struct frame *f, ulong cnt)
775{
776 ulong fcnt;
777
778 f->lba += cnt >> 9;
779loop:
780 fcnt = f->bv->bv_len - (f->bv_off - f->bv->bv_offset);
781 if (fcnt > cnt) {
782 f->bv_off += cnt;
783 return;
784 }
785 cnt -= fcnt;
786 f->bv++;
787 f->bv_off = f->bv->bv_offset;
788 goto loop;
789}
790
740void 791void
741aoecmd_ata_rsp(struct sk_buff *skb) 792aoecmd_ata_rsp(struct sk_buff *skb)
742{ 793{
@@ -754,6 +805,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
754 u16 aoemajor; 805 u16 aoemajor;
755 806
756 hin = (struct aoe_hdr *) skb_mac_header(skb); 807 hin = (struct aoe_hdr *) skb_mac_header(skb);
808 skb_pull(skb, sizeof(*hin));
757 aoemajor = get_unaligned_be16(&hin->major); 809 aoemajor = get_unaligned_be16(&hin->major);
758 d = aoedev_by_aoeaddr(aoemajor, hin->minor); 810 d = aoedev_by_aoeaddr(aoemajor, hin->minor);
759 if (d == NULL) { 811 if (d == NULL) {
@@ -791,7 +843,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
791 843
792 calc_rttavg(d, tsince(f->tag)); 844 calc_rttavg(d, tsince(f->tag));
793 845
794 ahin = (struct aoe_atahdr *) (hin+1); 846 ahin = (struct aoe_atahdr *) skb->data;
847 skb_pull(skb, sizeof(*ahin));
795 hout = (struct aoe_hdr *) skb_mac_header(f->skb); 848 hout = (struct aoe_hdr *) skb_mac_header(f->skb);
796 ahout = (struct aoe_atahdr *) (hout+1); 849 ahout = (struct aoe_atahdr *) (hout+1);
797 buf = f->buf; 850 buf = f->buf;
@@ -810,7 +863,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
810 switch (ahout->cmdstat) { 863 switch (ahout->cmdstat) {
811 case ATA_CMD_PIO_READ: 864 case ATA_CMD_PIO_READ:
812 case ATA_CMD_PIO_READ_EXT: 865 case ATA_CMD_PIO_READ_EXT:
813 if (skb->len - sizeof *hin - sizeof *ahin < n) { 866 if (skb->len < n) {
814 printk(KERN_ERR 867 printk(KERN_ERR
815 "aoe: %s. skb->len=%d need=%ld\n", 868 "aoe: %s. skb->len=%d need=%ld\n",
816 "runt data size in read", skb->len, n); 869 "runt data size in read", skb->len, n);
@@ -818,7 +871,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
818 spin_unlock_irqrestore(&d->lock, flags); 871 spin_unlock_irqrestore(&d->lock, flags);
819 return; 872 return;
820 } 873 }
821 memcpy(f->bufaddr, ahin+1, n); 874 bvcpy(f->bv, f->bv_off, skb, n);
822 case ATA_CMD_PIO_WRITE: 875 case ATA_CMD_PIO_WRITE:
823 case ATA_CMD_PIO_WRITE_EXT: 876 case ATA_CMD_PIO_WRITE_EXT:
824 ifp = getif(t, skb->dev); 877 ifp = getif(t, skb->dev);
@@ -828,21 +881,22 @@ aoecmd_ata_rsp(struct sk_buff *skb)
828 ifp->lostjumbo = 0; 881 ifp->lostjumbo = 0;
829 } 882 }
830 if (f->bcnt -= n) { 883 if (f->bcnt -= n) {
831 f->lba += n >> 9; 884 fadvance(f, n);
832 f->bufaddr += n;
833 resend(d, t, f); 885 resend(d, t, f);
834 goto xmit; 886 goto xmit;
835 } 887 }
836 break; 888 break;
837 case ATA_CMD_ID_ATA: 889 case ATA_CMD_ID_ATA:
838 if (skb->len - sizeof *hin - sizeof *ahin < 512) { 890 if (skb->len < 512) {
839 printk(KERN_INFO 891 printk(KERN_INFO
840 "aoe: runt data size in ataid. skb->len=%d\n", 892 "aoe: runt data size in ataid. skb->len=%d\n",
841 skb->len); 893 skb->len);
842 spin_unlock_irqrestore(&d->lock, flags); 894 spin_unlock_irqrestore(&d->lock, flags);
843 return; 895 return;
844 } 896 }
845 ataid_complete(d, t, (char *) (ahin+1)); 897 if (skb_linearize(skb))
898 break;
899 ataid_complete(d, t, skb->data);
846 break; 900 break;
847 default: 901 default:
848 printk(KERN_INFO 902 printk(KERN_INFO