diff options
author | Ed Cashin <ecashin@coraid.com> | 2012-10-04 20:16:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-05 14:05:24 -0400 |
commit | 3d5b06051cd5fa82c9a4285f7ce8650a0f0845ff (patch) | |
tree | 66b62def9b84b9d274f2d6b3589635015f98c56d /drivers/block/aoe/aoecmd.c | |
parent | a336d29870f8a1f8e5f10d9f1aa95531c4edeabe (diff) |
aoe: for performance support larger packet payloads
tAdd adds the ability to work with large packets composed of a number of
segments, using the scatter gather feature of the block layer (biovecs)
and the network layer (skb frag array). The motivation is the performance
gained by using a packet data payload greater than a page size and by
using the network card's scatter gather feature.
Users of the out-of-tree aoe driver already had these changes, but since
early 2011, they have complained of increased memory utilization and
higher CPU utilization during heavy writes.[1] The commit below appears
related, as it disables scatter gather on non-IP protocols inside the
harmonize_features function, even when the NIC supports sg.
commit f01a5236bd4b140198fbcc550f085e8361fd73fa
Author: Jesse Gross <jesse@nicira.com>
Date: Sun Jan 9 06:23:31 2011 +0000
net offloading: Generalize netif_get_vlan_features().
With that regression in place, transmits always linearize sg AoE packets,
but in-kernel users did not have this patch. Before 2.6.38, though, these
changes were working to allow sg to increase performance.
1. http://www.spinics.net/lists/linux-mm/msg15184.html
Signed-off-by: Ed Cashin <ecashin@coraid.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/aoe/aoecmd.c')
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 138 |
1 files changed, 96 insertions, 42 deletions
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 887f68f6d79a..9a58242290c0 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c | |||
@@ -165,7 +165,8 @@ freeframe(struct aoedev *d) | |||
165 | rf = f; | 165 | rf = f; |
166 | continue; | 166 | continue; |
167 | } | 167 | } |
168 | gotone: skb_shinfo(skb)->nr_frags = skb->data_len = 0; | 168 | gotone: skb->truesize -= skb->data_len; |
169 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; | ||
169 | skb_trim(skb, 0); | 170 | skb_trim(skb, 0); |
170 | d->tgt = t; | 171 | d->tgt = t; |
171 | ifrotate(*t); | 172 | ifrotate(*t); |
@@ -201,6 +202,24 @@ gotone: skb_shinfo(skb)->nr_frags = skb->data_len = 0; | |||
201 | return NULL; | 202 | return NULL; |
202 | } | 203 | } |
203 | 204 | ||
205 | static void | ||
206 | skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt) | ||
207 | { | ||
208 | int frag = 0; | ||
209 | ulong fcnt; | ||
210 | loop: | ||
211 | fcnt = bv->bv_len - (off - bv->bv_offset); | ||
212 | if (fcnt > cnt) | ||
213 | fcnt = cnt; | ||
214 | skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt); | ||
215 | cnt -= fcnt; | ||
216 | if (cnt <= 0) | ||
217 | return; | ||
218 | bv++; | ||
219 | off = bv->bv_offset; | ||
220 | goto loop; | ||
221 | } | ||
222 | |||
204 | static int | 223 | static int |
205 | aoecmd_ata_rw(struct aoedev *d) | 224 | aoecmd_ata_rw(struct aoedev *d) |
206 | { | 225 | { |
@@ -211,7 +230,7 @@ aoecmd_ata_rw(struct aoedev *d) | |||
211 | struct bio_vec *bv; | 230 | struct bio_vec *bv; |
212 | struct aoetgt *t; | 231 | struct aoetgt *t; |
213 | struct sk_buff *skb; | 232 | struct sk_buff *skb; |
214 | ulong bcnt; | 233 | ulong bcnt, fbcnt; |
215 | char writebit, extbit; | 234 | char writebit, extbit; |
216 | 235 | ||
217 | writebit = 0x10; | 236 | writebit = 0x10; |
@@ -226,8 +245,28 @@ aoecmd_ata_rw(struct aoedev *d) | |||
226 | bcnt = t->ifp->maxbcnt; | 245 | bcnt = t->ifp->maxbcnt; |
227 | if (bcnt == 0) | 246 | if (bcnt == 0) |
228 | bcnt = DEFAULTBCNT; | 247 | bcnt = DEFAULTBCNT; |
229 | if (bcnt > buf->bv_resid) | 248 | if (bcnt > buf->resid) |
230 | bcnt = buf->bv_resid; | 249 | bcnt = buf->resid; |
250 | fbcnt = bcnt; | ||
251 | f->bv = buf->bv; | ||
252 | f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid); | ||
253 | do { | ||
254 | if (fbcnt < buf->bv_resid) { | ||
255 | buf->bv_resid -= fbcnt; | ||
256 | buf->resid -= fbcnt; | ||
257 | break; | ||
258 | } | ||
259 | fbcnt -= buf->bv_resid; | ||
260 | buf->resid -= buf->bv_resid; | ||
261 | if (buf->resid == 0) { | ||
262 | d->inprocess = NULL; | ||
263 | break; | ||
264 | } | ||
265 | buf->bv++; | ||
266 | buf->bv_resid = buf->bv->bv_len; | ||
267 | WARN_ON(buf->bv_resid == 0); | ||
268 | } while (fbcnt); | ||
269 | |||
231 | /* initialize the headers & frame */ | 270 | /* initialize the headers & frame */ |
232 | skb = f->skb; | 271 | skb = f->skb; |
233 | h = (struct aoe_hdr *) skb_mac_header(skb); | 272 | h = (struct aoe_hdr *) skb_mac_header(skb); |
@@ -238,7 +277,6 @@ aoecmd_ata_rw(struct aoedev *d) | |||
238 | t->nout++; | 277 | t->nout++; |
239 | f->waited = 0; | 278 | f->waited = 0; |
240 | f->buf = buf; | 279 | f->buf = buf; |
241 | f->bufaddr = page_address(bv->bv_page) + buf->bv_off; | ||
242 | f->bcnt = bcnt; | 280 | f->bcnt = bcnt; |
243 | f->lba = buf->sector; | 281 | f->lba = buf->sector; |
244 | 282 | ||
@@ -253,10 +291,11 @@ aoecmd_ata_rw(struct aoedev *d) | |||
253 | ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ | 291 | ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ |
254 | } | 292 | } |
255 | if (bio_data_dir(buf->bio) == WRITE) { | 293 | if (bio_data_dir(buf->bio) == WRITE) { |
256 | skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt); | 294 | skb_fillup(skb, f->bv, f->bv_off, bcnt); |
257 | ah->aflags |= AOEAFL_WRITE; | 295 | ah->aflags |= AOEAFL_WRITE; |
258 | skb->len += bcnt; | 296 | skb->len += bcnt; |
259 | skb->data_len = bcnt; | 297 | skb->data_len = bcnt; |
298 | skb->truesize += bcnt; | ||
260 | t->wpkts++; | 299 | t->wpkts++; |
261 | } else { | 300 | } else { |
262 | t->rpkts++; | 301 | t->rpkts++; |
@@ -267,18 +306,7 @@ aoecmd_ata_rw(struct aoedev *d) | |||
267 | 306 | ||
268 | /* mark all tracking fields and load out */ | 307 | /* mark all tracking fields and load out */ |
269 | buf->nframesout += 1; | 308 | buf->nframesout += 1; |
270 | buf->bv_off += bcnt; | ||
271 | buf->bv_resid -= bcnt; | ||
272 | buf->resid -= bcnt; | ||
273 | buf->sector += bcnt >> 9; | 309 | buf->sector += bcnt >> 9; |
274 | if (buf->resid == 0) { | ||
275 | d->inprocess = NULL; | ||
276 | } else if (buf->bv_resid == 0) { | ||
277 | buf->bv = ++bv; | ||
278 | buf->bv_resid = bv->bv_len; | ||
279 | WARN_ON(buf->bv_resid == 0); | ||
280 | buf->bv_off = bv->bv_offset; | ||
281 | } | ||
282 | 310 | ||
283 | skb->dev = t->ifp->nd; | 311 | skb->dev = t->ifp->nd; |
284 | skb = skb_clone(skb, GFP_ATOMIC); | 312 | skb = skb_clone(skb, GFP_ATOMIC); |
@@ -365,14 +393,12 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f) | |||
365 | put_lba(ah, f->lba); | 393 | put_lba(ah, f->lba); |
366 | 394 | ||
367 | n = f->bcnt; | 395 | n = f->bcnt; |
368 | if (n > DEFAULTBCNT) | ||
369 | n = DEFAULTBCNT; | ||
370 | ah->scnt = n >> 9; | 396 | ah->scnt = n >> 9; |
371 | if (ah->aflags & AOEAFL_WRITE) { | 397 | if (ah->aflags & AOEAFL_WRITE) { |
372 | skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr), | 398 | skb_fillup(skb, f->bv, f->bv_off, n); |
373 | offset_in_page(f->bufaddr), n); | ||
374 | skb->len = sizeof *h + sizeof *ah + n; | 399 | skb->len = sizeof *h + sizeof *ah + n; |
375 | skb->data_len = n; | 400 | skb->data_len = n; |
401 | skb->truesize += n; | ||
376 | } | 402 | } |
377 | } | 403 | } |
378 | skb->dev = t->ifp->nd; | 404 | skb->dev = t->ifp->nd; |
@@ -531,20 +557,6 @@ rexmit_timer(ulong vp) | |||
531 | ejectif(t, ifp); | 557 | ejectif(t, ifp); |
532 | ifp = NULL; | 558 | ifp = NULL; |
533 | } | 559 | } |
534 | |||
535 | if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512 | ||
536 | && ifp && ++ifp->lostjumbo > (t->nframes << 1) | ||
537 | && ifp->maxbcnt != DEFAULTBCNT) { | ||
538 | printk(KERN_INFO | ||
539 | "aoe: e%ld.%d: " | ||
540 | "too many lost jumbo on " | ||
541 | "%s:%pm - " | ||
542 | "falling back to %d frames.\n", | ||
543 | d->aoemajor, d->aoeminor, | ||
544 | ifp->nd->name, t->addr, | ||
545 | DEFAULTBCNT); | ||
546 | ifp->maxbcnt = 0; | ||
547 | } | ||
548 | resend(d, t, f); | 560 | resend(d, t, f); |
549 | } | 561 | } |
550 | 562 | ||
@@ -737,6 +749,45 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector | |||
737 | part_stat_unlock(); | 749 | part_stat_unlock(); |
738 | } | 750 | } |
739 | 751 | ||
752 | static void | ||
753 | bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, ulong cnt) | ||
754 | { | ||
755 | ulong fcnt; | ||
756 | char *p; | ||
757 | int soff = 0; | ||
758 | loop: | ||
759 | fcnt = bv->bv_len - (off - bv->bv_offset); | ||
760 | if (fcnt > cnt) | ||
761 | fcnt = cnt; | ||
762 | p = page_address(bv->bv_page) + off; | ||
763 | skb_copy_bits(skb, soff, p, fcnt); | ||
764 | soff += fcnt; | ||
765 | cnt -= fcnt; | ||
766 | if (cnt <= 0) | ||
767 | return; | ||
768 | bv++; | ||
769 | off = bv->bv_offset; | ||
770 | goto loop; | ||
771 | } | ||
772 | |||
773 | static void | ||
774 | fadvance(struct frame *f, ulong cnt) | ||
775 | { | ||
776 | ulong fcnt; | ||
777 | |||
778 | f->lba += cnt >> 9; | ||
779 | loop: | ||
780 | fcnt = f->bv->bv_len - (f->bv_off - f->bv->bv_offset); | ||
781 | if (fcnt > cnt) { | ||
782 | f->bv_off += cnt; | ||
783 | return; | ||
784 | } | ||
785 | cnt -= fcnt; | ||
786 | f->bv++; | ||
787 | f->bv_off = f->bv->bv_offset; | ||
788 | goto loop; | ||
789 | } | ||
790 | |||
740 | void | 791 | void |
741 | aoecmd_ata_rsp(struct sk_buff *skb) | 792 | aoecmd_ata_rsp(struct sk_buff *skb) |
742 | { | 793 | { |
@@ -754,6 +805,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) | |||
754 | u16 aoemajor; | 805 | u16 aoemajor; |
755 | 806 | ||
756 | hin = (struct aoe_hdr *) skb_mac_header(skb); | 807 | hin = (struct aoe_hdr *) skb_mac_header(skb); |
808 | skb_pull(skb, sizeof(*hin)); | ||
757 | aoemajor = get_unaligned_be16(&hin->major); | 809 | aoemajor = get_unaligned_be16(&hin->major); |
758 | d = aoedev_by_aoeaddr(aoemajor, hin->minor); | 810 | d = aoedev_by_aoeaddr(aoemajor, hin->minor); |
759 | if (d == NULL) { | 811 | if (d == NULL) { |
@@ -791,7 +843,8 @@ aoecmd_ata_rsp(struct sk_buff *skb) | |||
791 | 843 | ||
792 | calc_rttavg(d, tsince(f->tag)); | 844 | calc_rttavg(d, tsince(f->tag)); |
793 | 845 | ||
794 | ahin = (struct aoe_atahdr *) (hin+1); | 846 | ahin = (struct aoe_atahdr *) skb->data; |
847 | skb_pull(skb, sizeof(*ahin)); | ||
795 | hout = (struct aoe_hdr *) skb_mac_header(f->skb); | 848 | hout = (struct aoe_hdr *) skb_mac_header(f->skb); |
796 | ahout = (struct aoe_atahdr *) (hout+1); | 849 | ahout = (struct aoe_atahdr *) (hout+1); |
797 | buf = f->buf; | 850 | buf = f->buf; |
@@ -810,7 +863,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) | |||
810 | switch (ahout->cmdstat) { | 863 | switch (ahout->cmdstat) { |
811 | case ATA_CMD_PIO_READ: | 864 | case ATA_CMD_PIO_READ: |
812 | case ATA_CMD_PIO_READ_EXT: | 865 | case ATA_CMD_PIO_READ_EXT: |
813 | if (skb->len - sizeof *hin - sizeof *ahin < n) { | 866 | if (skb->len < n) { |
814 | printk(KERN_ERR | 867 | printk(KERN_ERR |
815 | "aoe: %s. skb->len=%d need=%ld\n", | 868 | "aoe: %s. skb->len=%d need=%ld\n", |
816 | "runt data size in read", skb->len, n); | 869 | "runt data size in read", skb->len, n); |
@@ -818,7 +871,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) | |||
818 | spin_unlock_irqrestore(&d->lock, flags); | 871 | spin_unlock_irqrestore(&d->lock, flags); |
819 | return; | 872 | return; |
820 | } | 873 | } |
821 | memcpy(f->bufaddr, ahin+1, n); | 874 | bvcpy(f->bv, f->bv_off, skb, n); |
822 | case ATA_CMD_PIO_WRITE: | 875 | case ATA_CMD_PIO_WRITE: |
823 | case ATA_CMD_PIO_WRITE_EXT: | 876 | case ATA_CMD_PIO_WRITE_EXT: |
824 | ifp = getif(t, skb->dev); | 877 | ifp = getif(t, skb->dev); |
@@ -828,21 +881,22 @@ aoecmd_ata_rsp(struct sk_buff *skb) | |||
828 | ifp->lostjumbo = 0; | 881 | ifp->lostjumbo = 0; |
829 | } | 882 | } |
830 | if (f->bcnt -= n) { | 883 | if (f->bcnt -= n) { |
831 | f->lba += n >> 9; | 884 | fadvance(f, n); |
832 | f->bufaddr += n; | ||
833 | resend(d, t, f); | 885 | resend(d, t, f); |
834 | goto xmit; | 886 | goto xmit; |
835 | } | 887 | } |
836 | break; | 888 | break; |
837 | case ATA_CMD_ID_ATA: | 889 | case ATA_CMD_ID_ATA: |
838 | if (skb->len - sizeof *hin - sizeof *ahin < 512) { | 890 | if (skb->len < 512) { |
839 | printk(KERN_INFO | 891 | printk(KERN_INFO |
840 | "aoe: runt data size in ataid. skb->len=%d\n", | 892 | "aoe: runt data size in ataid. skb->len=%d\n", |
841 | skb->len); | 893 | skb->len); |
842 | spin_unlock_irqrestore(&d->lock, flags); | 894 | spin_unlock_irqrestore(&d->lock, flags); |
843 | return; | 895 | return; |
844 | } | 896 | } |
845 | ataid_complete(d, t, (char *) (ahin+1)); | 897 | if (skb_linearize(skb)) |
898 | break; | ||
899 | ataid_complete(d, t, skb->data); | ||
846 | break; | 900 | break; |
847 | default: | 901 | default: |
848 | printk(KERN_INFO | 902 | printk(KERN_INFO |