aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/aoe/aoecmd.c
diff options
context:
space:
mode:
authorEd Cashin <ecashin@coraid.com>2012-12-17 19:03:43 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 20:15:24 -0500
commit3a0c40d2d29e476ece583540e4f11276e0f36d5f (patch)
tree6f57de53c253bf588040611c12e5cb9e84326ba6 /drivers/block/aoe/aoecmd.c
parent667be1e757f5684576d01d7402907a2489b1402f (diff)
aoe: improve network congestion handling
The aoe driver already had some congestion handling, but it was limited in its ability to cope with the kind of congestion that can arise on more complex networks such as those involving paths through multiple ethernet switches. Some of the lessons from TCP's history of development can be applied to improving the congestion control and avoidance on AoE storage networks. These changes use familar concepts from Van Jacobson's "Congestion Avoidance and Control" paper from '88, without adding significant overhead. This patch depends on an upcoming patch that covers the failover case when AoE commands being retransmitted are transferred from one retransmit queue to another. Another upcoming patch increases the timing accuracy. Signed-off-by: Ed Cashin <ecashin@coraid.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/aoe/aoecmd.c')
-rw-r--r--drivers/block/aoe/aoecmd.c173
1 files changed, 106 insertions, 67 deletions
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index c4ff70b61e7e..f849fa2471ca 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -59,6 +59,23 @@ new_skb(ulong len)
59} 59}
60 60
61static struct frame * 61static struct frame *
62getframe_deferred(struct aoedev *d, u32 tag)
63{
64 struct list_head *head, *pos, *nx;
65 struct frame *f;
66
67 head = &d->rexmitq;
68 list_for_each_safe(pos, nx, head) {
69 f = list_entry(pos, struct frame, head);
70 if (f->tag == tag) {
71 list_del(pos);
72 return f;
73 }
74 }
75 return NULL;
76}
77
78static struct frame *
62getframe(struct aoedev *d, u32 tag) 79getframe(struct aoedev *d, u32 tag)
63{ 80{
64 struct frame *f; 81 struct frame *f;
@@ -553,10 +570,29 @@ sthtith(struct aoedev *d)
553} 570}
554 571
555static void 572static void
573rexmit_deferred(struct aoedev *d)
574{
575 struct aoetgt *t;
576 struct frame *f;
577 struct list_head *pos, *nx, *head;
578
579 head = &d->rexmitq;
580 list_for_each_safe(pos, nx, head) {
581 f = list_entry(pos, struct frame, head);
582 t = f->t;
583 if (t->nout >= t->maxout)
584 continue;
585 list_del(pos);
586 t->nout++;
587 resend(d, f);
588 }
589}
590
591static void
556rexmit_timer(ulong vp) 592rexmit_timer(ulong vp)
557{ 593{
558 struct aoedev *d; 594 struct aoedev *d;
559 struct aoetgt *t, **tt, **te; 595 struct aoetgt *t;
560 struct aoeif *ifp; 596 struct aoeif *ifp;
561 struct frame *f; 597 struct frame *f;
562 struct list_head *head, *pos, *nx; 598 struct list_head *head, *pos, *nx;
@@ -567,9 +603,11 @@ rexmit_timer(ulong vp)
567 603
568 d = (struct aoedev *) vp; 604 d = (struct aoedev *) vp;
569 605
570 /* timeout is always ~150% of the moving average */ 606 /* timeout based on observed timings and variations */
571 timeout = d->rttavg; 607 timeout = 2 * d->rttavg >> RTTSCALE;
572 timeout += timeout >> 1; 608 timeout += 8 * d->rttdev >> RTTDSCALE;
609 if (timeout == 0)
610 timeout = 1;
573 611
574 spin_lock_irqsave(&d->lock, flags); 612 spin_lock_irqsave(&d->lock, flags);
575 613
@@ -589,29 +627,12 @@ rexmit_timer(ulong vp)
589 list_move_tail(pos, &flist); 627 list_move_tail(pos, &flist);
590 } 628 }
591 } 629 }
592 /* window check */
593 tt = d->targets;
594 te = tt + d->ntargets;
595 for (; tt < te && (t = *tt); tt++) {
596 if (t->nout == t->maxout
597 && t->maxout < t->nframes
598 && (jiffies - t->lastwadj)/HZ > 10) {
599 t->maxout++;
600 t->lastwadj = jiffies;
601 }
602 }
603
604 if (!list_empty(&flist)) { /* retransmissions necessary */
605 n = d->rttavg <<= 1;
606 if (n > MAXTIMER)
607 d->rttavg = MAXTIMER;
608 }
609 630
610 /* process expired frames */ 631 /* process expired frames */
611 while (!list_empty(&flist)) { 632 while (!list_empty(&flist)) {
612 pos = flist.next; 633 pos = flist.next;
613 f = list_entry(pos, struct frame, head); 634 f = list_entry(pos, struct frame, head);
614 n = f->waited += timeout; 635 n = f->waited += tsince(f->tag);
615 n /= HZ; 636 n /= HZ;
616 if (n > aoe_deadsecs) { 637 if (n > aoe_deadsecs) {
617 /* Waited too long. Device failure. 638 /* Waited too long. Device failure.
@@ -620,18 +641,16 @@ rexmit_timer(ulong vp)
620 */ 641 */
621 list_splice(&flist, &d->factive[0]); 642 list_splice(&flist, &d->factive[0]);
622 aoedev_downdev(d); 643 aoedev_downdev(d);
623 break; 644 goto out;
624 } 645 }
625 list_del(pos);
626 646
627 t = f->t; 647 t = f->t;
628 if (n > aoe_deadsecs/2) 648 if (n > aoe_deadsecs/2)
629 d->htgt = t; /* see if another target can help */ 649 d->htgt = t; /* see if another target can help */
630 650
631 if (t->nout == t->maxout) { 651 if (t->maxout != 1) {
632 if (t->maxout > 1) 652 t->ssthresh = t->maxout / 2;
633 t->maxout--; 653 t->maxout = 1;
634 t->lastwadj = jiffies;
635 } 654 }
636 655
637 ifp = getif(t, f->skb->dev); 656 ifp = getif(t, f->skb->dev);
@@ -640,9 +659,12 @@ rexmit_timer(ulong vp)
640 ejectif(t, ifp); 659 ejectif(t, ifp);
641 ifp = NULL; 660 ifp = NULL;
642 } 661 }
643 resend(d, f); 662 list_move_tail(pos, &d->rexmitq);
663 t->nout--;
644 } 664 }
665 rexmit_deferred(d);
645 666
667out:
646 if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) { 668 if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) {
647 d->flags &= ~DEVFL_KICKME; 669 d->flags &= ~DEVFL_KICKME;
648 d->blkq->request_fn(d->blkq); 670 d->blkq->request_fn(d->blkq);
@@ -766,6 +788,7 @@ aoecmd_work(struct aoedev *d)
766{ 788{
767 if (d->htgt && !sthtith(d)) 789 if (d->htgt && !sthtith(d))
768 return; 790 return;
791 rexmit_deferred(d);
769 while (aoecmd_ata_rw(d)) 792 while (aoecmd_ata_rw(d))
770 ; 793 ;
771} 794}
@@ -868,26 +891,28 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
868} 891}
869 892
870static void 893static void
871calc_rttavg(struct aoedev *d, int rtt) 894calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt)
872{ 895{
873 register long n; 896 register long n;
874 897
875 n = rtt; 898 n = rtt;
876 if (n < 0) { 899
877 n = -rtt; 900 /* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */
878 if (n < MINTIMER) 901 n -= d->rttavg >> RTTSCALE;
879 n = MINTIMER; 902 d->rttavg += n;
880 else if (n > MAXTIMER) 903 if (n < 0)
881 n = MAXTIMER; 904 n = -n;
882 d->mintimer += (n - d->mintimer) >> 1; 905 n -= d->rttdev >> RTTDSCALE;
883 } else if (n < d->mintimer) 906 d->rttdev += n;
884 n = d->mintimer; 907
885 else if (n > MAXTIMER) 908 if (!t || t->maxout >= t->nframes)
886 n = MAXTIMER; 909 return;
887 910 if (t->maxout < t->ssthresh)
888 /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */ 911 t->maxout += 1;
889 n -= d->rttavg; 912 else if (t->nout == t->maxout && t->next_cwnd-- == 0) {
890 d->rttavg += n >> 2; 913 t->maxout += 1;
914 t->next_cwnd = t->maxout;
915 }
891} 916}
892 917
893static struct aoetgt * 918static struct aoetgt *
@@ -1147,7 +1172,6 @@ aoecmd_ata_rsp(struct sk_buff *skb)
1147 struct aoedev *d; 1172 struct aoedev *d;
1148 struct aoe_hdr *h; 1173 struct aoe_hdr *h;
1149 struct frame *f; 1174 struct frame *f;
1150 struct aoetgt *t;
1151 u32 n; 1175 u32 n;
1152 ulong flags; 1176 ulong flags;
1153 char ebuf[128]; 1177 char ebuf[128];
@@ -1168,23 +1192,28 @@ aoecmd_ata_rsp(struct sk_buff *skb)
1168 1192
1169 n = be32_to_cpu(get_unaligned(&h->tag)); 1193 n = be32_to_cpu(get_unaligned(&h->tag));
1170 f = getframe(d, n); 1194 f = getframe(d, n);
1171 if (f == NULL) { 1195 if (f) {
1172 calc_rttavg(d, -tsince(n)); 1196 calc_rttavg(d, f->t, tsince(n));
1173 spin_unlock_irqrestore(&d->lock, flags); 1197 f->t->nout--;
1174 aoedev_put(d); 1198 } else {
1175 snprintf(ebuf, sizeof ebuf, 1199 f = getframe_deferred(d, n);
1176 "%15s e%d.%d tag=%08x@%08lx\n", 1200 if (f) {
1177 "unexpected rsp", 1201 calc_rttavg(d, NULL, tsince(n));
1178 get_unaligned_be16(&h->major), 1202 } else {
1179 h->minor, 1203 calc_rttavg(d, NULL, tsince(n));
1180 get_unaligned_be32(&h->tag), 1204 spin_unlock_irqrestore(&d->lock, flags);
1181 jiffies); 1205 aoedev_put(d);
1182 aoechr_error(ebuf); 1206 snprintf(ebuf, sizeof(ebuf),
1183 return skb; 1207 "%15s e%d.%d tag=%08x@%08lx\n",
1208 "unexpected rsp",
1209 get_unaligned_be16(&h->major),
1210 h->minor,
1211 get_unaligned_be32(&h->tag),
1212 jiffies);
1213 aoechr_error(ebuf);
1214 return skb;
1215 }
1184 } 1216 }
1185 t = f->t;
1186 calc_rttavg(d, tsince(f->tag));
1187 t->nout--;
1188 aoecmd_work(d); 1217 aoecmd_work(d);
1189 1218
1190 spin_unlock_irqrestore(&d->lock, flags); 1219 spin_unlock_irqrestore(&d->lock, flags);
@@ -1241,7 +1270,8 @@ aoecmd_ata_id(struct aoedev *d)
1241 1270
1242 skb->dev = t->ifp->nd; 1271 skb->dev = t->ifp->nd;
1243 1272
1244 d->rttavg = MAXTIMER; 1273 d->rttavg = RTTAVG_INIT;
1274 d->rttdev = RTTDEV_INIT;
1245 d->timer.function = rexmit_timer; 1275 d->timer.function = rexmit_timer;
1246 1276
1247 return skb_clone(skb, GFP_ATOMIC); 1277 return skb_clone(skb, GFP_ATOMIC);
@@ -1273,7 +1303,7 @@ addtgt(struct aoedev *d, char *addr, ulong nframes)
1273 t->d = d; 1303 t->d = d;
1274 memcpy(t->addr, addr, sizeof t->addr); 1304 memcpy(t->addr, addr, sizeof t->addr);
1275 t->ifp = t->ifs; 1305 t->ifp = t->ifs;
1276 t->maxout = t->nframes; 1306 aoecmd_wreset(t);
1277 INIT_LIST_HEAD(&t->ffree); 1307 INIT_LIST_HEAD(&t->ffree);
1278 return *tt = t; 1308 return *tt = t;
1279} 1309}
@@ -1382,7 +1412,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
1382 if (t) { 1412 if (t) {
1383 t->nframes = n; 1413 t->nframes = n;
1384 if (n < t->maxout) 1414 if (n < t->maxout)
1385 t->maxout = n; 1415 aoecmd_wreset(t);
1386 } else { 1416 } else {
1387 t = addtgt(d, h->src, n); 1417 t = addtgt(d, h->src, n);
1388 if (!t) 1418 if (!t)
@@ -1412,17 +1442,26 @@ bail:
1412} 1442}
1413 1443
1414void 1444void
1445aoecmd_wreset(struct aoetgt *t)
1446{
1447 t->maxout = 1;
1448 t->ssthresh = t->nframes / 2;
1449 t->next_cwnd = t->nframes;
1450}
1451
1452void
1415aoecmd_cleanslate(struct aoedev *d) 1453aoecmd_cleanslate(struct aoedev *d)
1416{ 1454{
1417 struct aoetgt **t, **te; 1455 struct aoetgt **t, **te;
1418 1456
1419 d->mintimer = MINTIMER; 1457 d->rttavg = RTTAVG_INIT;
1458 d->rttdev = RTTDEV_INIT;
1420 d->maxbcnt = 0; 1459 d->maxbcnt = 0;
1421 1460
1422 t = d->targets; 1461 t = d->targets;
1423 te = t + NTARGETS; 1462 te = t + NTARGETS;
1424 for (; t < te && *t; t++) 1463 for (; t < te && *t; t++)
1425 (*t)->maxout = (*t)->nframes; 1464 aoecmd_wreset(*t);
1426} 1465}
1427 1466
1428void 1467void