diff options
author | Ed Cashin <ecashin@coraid.com> | 2012-12-17 19:03:49 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 20:15:24 -0500 |
commit | 5f0c9c48e7265039c3f945aaf44a1c6ae8adbd01 (patch) | |
tree | 2eb73358dc9ad58dce32dd40ceb0fbc938d915d4 /drivers/block/aoe | |
parent | 0d555ecfa468f6dc29697829844f2f79909e376f (diff) |
aoe: use high-resolution RTTs with fallback to low-res
These changes improve the accuracy of the decision about whether it's time
to retransmit an AoE command by using the microsecond-resolution
gettimeofday instead of jiffies.
Because the system time can jump suddenly, the decision reverts to using
jiffies if the high-resolution time difference is relatively large.
Otherwise the AoE targets could be considered failed inappropriately.
Signed-off-by: Ed Cashin <ecashin@coraid.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/aoe')
-rw-r--r-- | drivers/block/aoe/aoe.h | 9 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 57 |
2 files changed, 55 insertions, 11 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 9e884acd75fc..9fb68fc3b280 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h | |||
@@ -88,8 +88,7 @@ enum { | |||
88 | TIMERTICK = HZ / 10, | 88 | TIMERTICK = HZ / 10, |
89 | RTTSCALE = 8, | 89 | RTTSCALE = 8, |
90 | RTTDSCALE = 3, | 90 | RTTDSCALE = 3, |
91 | MAXTIMER = HZ << 1, | 91 | RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE, |
92 | RTTAVG_INIT = HZ / 4 << RTTSCALE, | ||
93 | RTTDEV_INIT = RTTAVG_INIT / 4, | 92 | RTTDEV_INIT = RTTAVG_INIT / 4, |
94 | }; | 93 | }; |
95 | 94 | ||
@@ -106,6 +105,8 @@ struct buf { | |||
106 | struct frame { | 105 | struct frame { |
107 | struct list_head head; | 106 | struct list_head head; |
108 | u32 tag; | 107 | u32 tag; |
108 | struct timeval sent; /* high-res time packet was sent */ | ||
109 | u32 sent_jiffs; /* low-res jiffies-based sent time */ | ||
109 | ulong waited; | 110 | ulong waited; |
110 | struct aoetgt *t; /* parent target I belong to */ | 111 | struct aoetgt *t; /* parent target I belong to */ |
111 | sector_t lba; | 112 | sector_t lba; |
@@ -143,11 +144,11 @@ struct aoedev { | |||
143 | struct aoedev *next; | 144 | struct aoedev *next; |
144 | ulong sysminor; | 145 | ulong sysminor; |
145 | ulong aoemajor; | 146 | ulong aoemajor; |
147 | u32 rttavg; /* scaled AoE round trip time average */ | ||
148 | u32 rttdev; /* scaled round trip time mean deviation */ | ||
146 | u16 aoeminor; | 149 | u16 aoeminor; |
147 | u16 flags; | 150 | u16 flags; |
148 | u16 nopen; /* (bd_openers isn't available without sleeping) */ | 151 | u16 nopen; /* (bd_openers isn't available without sleeping) */ |
149 | u16 rttavg; /* scaled AoE round trip time average */ | ||
150 | u16 rttdev; /* scaled round trip time mean deviation */ | ||
151 | u16 fw_ver; /* version of blade's firmware */ | 152 | u16 fw_ver; /* version of blade's firmware */ |
152 | u16 lasttag; /* last tag sent */ | 153 | u16 lasttag; /* last tag sent */ |
153 | u16 useme; | 154 | u16 useme; |
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 9aefbe3957ca..a99220ad6262 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c | |||
@@ -387,6 +387,8 @@ aoecmd_ata_rw(struct aoedev *d) | |||
387 | skb->dev = t->ifp->nd; | 387 | skb->dev = t->ifp->nd; |
388 | skb = skb_clone(skb, GFP_ATOMIC); | 388 | skb = skb_clone(skb, GFP_ATOMIC); |
389 | if (skb) { | 389 | if (skb) { |
390 | do_gettimeofday(&f->sent); | ||
391 | f->sent_jiffs = (u32) jiffies; | ||
390 | __skb_queue_head_init(&queue); | 392 | __skb_queue_head_init(&queue); |
391 | __skb_queue_tail(&queue, skb); | 393 | __skb_queue_tail(&queue, skb); |
392 | aoenet_xmit(&queue); | 394 | aoenet_xmit(&queue); |
@@ -475,12 +477,46 @@ resend(struct aoedev *d, struct frame *f) | |||
475 | skb = skb_clone(skb, GFP_ATOMIC); | 477 | skb = skb_clone(skb, GFP_ATOMIC); |
476 | if (skb == NULL) | 478 | if (skb == NULL) |
477 | return; | 479 | return; |
480 | do_gettimeofday(&f->sent); | ||
481 | f->sent_jiffs = (u32) jiffies; | ||
478 | __skb_queue_head_init(&queue); | 482 | __skb_queue_head_init(&queue); |
479 | __skb_queue_tail(&queue, skb); | 483 | __skb_queue_tail(&queue, skb); |
480 | aoenet_xmit(&queue); | 484 | aoenet_xmit(&queue); |
481 | } | 485 | } |
482 | 486 | ||
483 | static int | 487 | static int |
488 | tsince_hr(struct frame *f) | ||
489 | { | ||
490 | struct timeval now; | ||
491 | int n; | ||
492 | |||
493 | do_gettimeofday(&now); | ||
494 | n = now.tv_usec - f->sent.tv_usec; | ||
495 | n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC; | ||
496 | |||
497 | if (n < 0) | ||
498 | n = -n; | ||
499 | |||
500 | /* For relatively long periods, use jiffies to avoid | ||
501 | * discrepancies caused by updates to the system time. | ||
502 | * | ||
503 | * On system with HZ of 1000, 32-bits is over 49 days | ||
504 | * worth of jiffies, or over 71 minutes worth of usecs. | ||
505 | * | ||
506 | * Jiffies overflow is handled by subtraction of unsigned ints: | ||
507 | * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe | ||
508 | * $3 = 4 | ||
509 | * (gdb) | ||
510 | */ | ||
511 | if (n > USEC_PER_SEC / 4) { | ||
512 | n = ((u32) jiffies) - f->sent_jiffs; | ||
513 | n *= USEC_PER_SEC / HZ; | ||
514 | } | ||
515 | |||
516 | return n; | ||
517 | } | ||
518 | |||
519 | static int | ||
484 | tsince(u32 tag) | 520 | tsince(u32 tag) |
485 | { | 521 | { |
486 | int n; | 522 | int n; |
@@ -489,7 +525,7 @@ tsince(u32 tag) | |||
489 | n -= tag & 0xffff; | 525 | n -= tag & 0xffff; |
490 | if (n < 0) | 526 | if (n < 0) |
491 | n += 1<<16; | 527 | n += 1<<16; |
492 | return n; | 528 | return jiffies_to_usecs(n + 1); |
493 | } | 529 | } |
494 | 530 | ||
495 | static struct aoeif * | 531 | static struct aoeif * |
@@ -552,6 +588,7 @@ sthtith(struct aoedev *d) | |||
552 | nf->bv = f->bv; | 588 | nf->bv = f->bv; |
553 | nf->bv_off = f->bv_off; | 589 | nf->bv_off = f->bv_off; |
554 | nf->waited = 0; | 590 | nf->waited = 0; |
591 | nf->sent_jiffs = f->sent_jiffs; | ||
555 | f->skb = skb; | 592 | f->skb = skb; |
556 | aoe_freetframe(f); | 593 | aoe_freetframe(f); |
557 | ht->nout--; | 594 | ht->nout--; |
@@ -621,7 +658,7 @@ rexmit_timer(ulong vp) | |||
621 | head = &d->factive[i]; | 658 | head = &d->factive[i]; |
622 | list_for_each_safe(pos, nx, head) { | 659 | list_for_each_safe(pos, nx, head) { |
623 | f = list_entry(pos, struct frame, head); | 660 | f = list_entry(pos, struct frame, head); |
624 | if (tsince(f->tag) < timeout) | 661 | if (tsince_hr(f) < timeout) |
625 | break; /* end of expired frames */ | 662 | break; /* end of expired frames */ |
626 | /* move to flist for later processing */ | 663 | /* move to flist for later processing */ |
627 | list_move_tail(pos, &flist); | 664 | list_move_tail(pos, &flist); |
@@ -632,8 +669,8 @@ rexmit_timer(ulong vp) | |||
632 | while (!list_empty(&flist)) { | 669 | while (!list_empty(&flist)) { |
633 | pos = flist.next; | 670 | pos = flist.next; |
634 | f = list_entry(pos, struct frame, head); | 671 | f = list_entry(pos, struct frame, head); |
635 | n = f->waited += tsince(f->tag); | 672 | n = f->waited += tsince_hr(f); |
636 | n /= HZ; | 673 | n /= USEC_PER_SEC; |
637 | if (n > aoe_deadsecs) { | 674 | if (n > aoe_deadsecs) { |
638 | /* Waited too long. Device failure. | 675 | /* Waited too long. Device failure. |
639 | * Hang all frames on first hash bucket for downdev | 676 | * Hang all frames on first hash bucket for downdev |
@@ -1193,12 +1230,12 @@ aoecmd_ata_rsp(struct sk_buff *skb) | |||
1193 | n = be32_to_cpu(get_unaligned(&h->tag)); | 1230 | n = be32_to_cpu(get_unaligned(&h->tag)); |
1194 | f = getframe(d, n); | 1231 | f = getframe(d, n); |
1195 | if (f) { | 1232 | if (f) { |
1196 | calc_rttavg(d, f->t, tsince(n)); | 1233 | calc_rttavg(d, f->t, tsince_hr(f)); |
1197 | f->t->nout--; | 1234 | f->t->nout--; |
1198 | } else { | 1235 | } else { |
1199 | f = getframe_deferred(d, n); | 1236 | f = getframe_deferred(d, n); |
1200 | if (f) { | 1237 | if (f) { |
1201 | calc_rttavg(d, NULL, tsince(n)); | 1238 | calc_rttavg(d, NULL, tsince_hr(f)); |
1202 | } else { | 1239 | } else { |
1203 | calc_rttavg(d, NULL, tsince(n)); | 1240 | calc_rttavg(d, NULL, tsince(n)); |
1204 | spin_unlock_irqrestore(&d->lock, flags); | 1241 | spin_unlock_irqrestore(&d->lock, flags); |
@@ -1276,7 +1313,13 @@ aoecmd_ata_id(struct aoedev *d) | |||
1276 | d->rttdev = RTTDEV_INIT; | 1313 | d->rttdev = RTTDEV_INIT; |
1277 | d->timer.function = rexmit_timer; | 1314 | d->timer.function = rexmit_timer; |
1278 | 1315 | ||
1279 | return skb_clone(skb, GFP_ATOMIC); | 1316 | skb = skb_clone(skb, GFP_ATOMIC); |
1317 | if (skb) { | ||
1318 | do_gettimeofday(&f->sent); | ||
1319 | f->sent_jiffs = (u32) jiffies; | ||
1320 | } | ||
1321 | |||
1322 | return skb; | ||
1280 | } | 1323 | } |
1281 | 1324 | ||
1282 | static struct aoetgt * | 1325 | static struct aoetgt * |