aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorEd Cashin <ecashin@coraid.com>2012-12-17 19:04:08 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 20:15:25 -0500
commitbbb44e30d07fdc111e34a5ec935b57521cea9499 (patch)
tree11dda6628c4d7719607891f287c12aa87f06855b /drivers
parentb91316f2b7bc0b1d128a9890a2a4895f7e1c74fc (diff)
aoe: improve handling of misbehaving network paths
An AoE target can have multiple network ports used for AoE, and in the aoe driver, those are tracked by the aoetgt struct. These changes allow the aoe driver to handle network paths, or aoetgts, that are not working well, compared to the others. Paths that do not get responses despite the retransmission of AoE commands are marked as "tainted", and non-tainted paths are preferred. Meanwhile, the aoe driver attempts to "probe" the tainted path in the background by issuing reads of LBA 0 that are padded out to full (possibly jumbo-frame) size. If the probes get responses, then the path is "redeemed", and its taint is removed. This mechanism has been shown to be helpful in transparently handling and recovering from real-world network "brown outs" in ways that the earlier "shoot the help-needing target in the head" mechanism could not. Signed-off-by: Ed Cashin <ecashin@coraid.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/aoe/aoe.h11
-rw-r--r--drivers/block/aoe/aoecmd.c377
-rw-r--r--drivers/block/aoe/aoedev.c1
3 files changed, 268 insertions, 121 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index bfd765cf0eb7..b6d2b16358be 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -91,6 +91,9 @@ enum {
91 RTTDSCALE = 3, 91 RTTDSCALE = 3,
92 RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE, 92 RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE,
93 RTTDEV_INIT = RTTAVG_INIT / 4, 93 RTTDEV_INIT = RTTAVG_INIT / 4,
94
95 HARD_SCORN_SECS = 10, /* try another remote port after this */
96 MAX_TAINT = 1000, /* cap on aoetgt taint */
94}; 97};
95 98
96struct buf { 99struct buf {
@@ -103,6 +106,10 @@ struct buf {
103 struct request *rq; 106 struct request *rq;
104}; 107};
105 108
109enum frame_flags {
110 FFL_PROBE = 1,
111};
112
106struct frame { 113struct frame {
107 struct list_head head; 114 struct list_head head;
108 u32 tag; 115 u32 tag;
@@ -118,6 +125,7 @@ struct frame {
118 struct bio_vec *bv; 125 struct bio_vec *bv;
119 ulong bcnt; 126 ulong bcnt;
120 ulong bv_off; 127 ulong bv_off;
128 char flags;
121}; 129};
122 130
123struct aoeif { 131struct aoeif {
@@ -138,8 +146,10 @@ struct aoetgt {
138 ushort next_cwnd; /* incr maxout after decrementing to zero */ 146 ushort next_cwnd; /* incr maxout after decrementing to zero */
139 ushort ssthresh; /* slow start threshold */ 147 ushort ssthresh; /* slow start threshold */
140 ulong falloc; /* number of allocated frames */ 148 ulong falloc; /* number of allocated frames */
149 int taint; /* how much we want to avoid this aoetgt */
141 int minbcnt; 150 int minbcnt;
142 int wpkts, rpkts; 151 int wpkts, rpkts;
152 char nout_probes;
143}; 153};
144 154
145struct aoedev { 155struct aoedev {
@@ -174,7 +184,6 @@ struct aoedev {
174 struct list_head rexmitq; /* deferred retransmissions */ 184 struct list_head rexmitq; /* deferred retransmissions */
175 struct aoetgt *targets[NTARGETS]; 185 struct aoetgt *targets[NTARGETS];
176 struct aoetgt **tgt; /* target in use when working */ 186 struct aoetgt **tgt; /* target in use when working */
177 struct aoetgt *htgt; /* target needing rexmit assistance */
178 ulong ntargets; 187 ulong ntargets;
179 ulong kicked; 188 ulong kicked;
180 char ident[512]; 189 char ident[512];
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 391dd8ee2009..000f7fb48841 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -22,6 +22,7 @@
22#define MAXIOC (8192) /* default meant to avoid most soft lockups */ 22#define MAXIOC (8192) /* default meant to avoid most soft lockups */
23 23
24static void ktcomplete(struct frame *, struct sk_buff *); 24static void ktcomplete(struct frame *, struct sk_buff *);
25static int count_targets(struct aoedev *d, int *untainted);
25 26
26static struct buf *nextbuf(struct aoedev *); 27static struct buf *nextbuf(struct aoedev *);
27 28
@@ -43,6 +44,8 @@ static struct {
43 spinlock_t lock; 44 spinlock_t lock;
44} iocq; 45} iocq;
45 46
47static struct page *empty_page;
48
46static struct sk_buff * 49static struct sk_buff *
47new_skb(ulong len) 50new_skb(ulong len)
48{ 51{
@@ -179,8 +182,10 @@ aoe_freetframe(struct frame *f)
179 182
180 t = f->t; 183 t = f->t;
181 f->buf = NULL; 184 f->buf = NULL;
185 f->lba = 0;
182 f->bv = NULL; 186 f->bv = NULL;
183 f->r_skb = NULL; 187 f->r_skb = NULL;
188 f->flags = 0;
184 list_add(&f->head, &t->ffree); 189 list_add(&f->head, &t->ffree);
185} 190}
186 191
@@ -234,20 +239,25 @@ newframe(struct aoedev *d)
234 struct frame *f; 239 struct frame *f;
235 struct aoetgt *t, **tt; 240 struct aoetgt *t, **tt;
236 int totout = 0; 241 int totout = 0;
242 int use_tainted;
243 int has_untainted;
237 244
238 if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */ 245 if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */
239 printk(KERN_ERR "aoe: NULL TARGETS!\n"); 246 printk(KERN_ERR "aoe: NULL TARGETS!\n");
240 return NULL; 247 return NULL;
241 } 248 }
242 tt = d->tgt; /* last used target */ 249 tt = d->tgt; /* last used target */
243 for (;;) { 250 for (use_tainted = 0, has_untainted = 0;;) {
244 tt++; 251 tt++;
245 if (tt >= &d->targets[NTARGETS] || !*tt) 252 if (tt >= &d->targets[NTARGETS] || !*tt)
246 tt = d->targets; 253 tt = d->targets;
247 t = *tt; 254 t = *tt;
248 totout += t->nout; 255 if (!t->taint) {
256 has_untainted = 1;
257 totout += t->nout;
258 }
249 if (t->nout < t->maxout 259 if (t->nout < t->maxout
250 && t != d->htgt 260 && (use_tainted || !t->taint)
251 && t->ifp->nd) { 261 && t->ifp->nd) {
252 f = newtframe(d, t); 262 f = newtframe(d, t);
253 if (f) { 263 if (f) {
@@ -256,8 +266,12 @@ newframe(struct aoedev *d)
256 return f; 266 return f;
257 } 267 }
258 } 268 }
259 if (tt == d->tgt) /* we've looped and found nada */ 269 if (tt == d->tgt) { /* we've looped and found nada */
260 break; 270 if (!use_tainted && !has_untainted)
271 use_tainted = 1;
272 else
273 break;
274 }
261 } 275 }
262 if (totout == 0) { 276 if (totout == 0) {
263 d->kicked++; 277 d->kicked++;
@@ -294,21 +308,68 @@ fhash(struct frame *f)
294 list_add_tail(&f->head, &d->factive[n]); 308 list_add_tail(&f->head, &d->factive[n]);
295} 309}
296 310
311static void
312ata_rw_frameinit(struct frame *f)
313{
314 struct aoetgt *t;
315 struct aoe_hdr *h;
316 struct aoe_atahdr *ah;
317 struct sk_buff *skb;
318 char writebit, extbit;
319
320 skb = f->skb;
321 h = (struct aoe_hdr *) skb_mac_header(skb);
322 ah = (struct aoe_atahdr *) (h + 1);
323 skb_put(skb, sizeof(*h) + sizeof(*ah));
324 memset(h, 0, skb->len);
325
326 writebit = 0x10;
327 extbit = 0x4;
328
329 t = f->t;
330 f->tag = aoehdr_atainit(t->d, t, h);
331 fhash(f);
332 t->nout++;
333 f->waited = 0;
334 f->waited_total = 0;
335 if (f->buf)
336 f->lba = f->buf->sector;
337
338 /* set up ata header */
339 ah->scnt = f->bcnt >> 9;
340 put_lba(ah, f->lba);
341 if (t->d->flags & DEVFL_EXT) {
342 ah->aflags |= AOEAFL_EXT;
343 } else {
344 extbit = 0;
345 ah->lba3 &= 0x0f;
346 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
347 }
348 if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
349 skb_fillup(skb, f->bv, f->bv_off, f->bcnt);
350 ah->aflags |= AOEAFL_WRITE;
351 skb->len += f->bcnt;
352 skb->data_len = f->bcnt;
353 skb->truesize += f->bcnt;
354 t->wpkts++;
355 } else {
356 t->rpkts++;
357 writebit = 0;
358 }
359
360 ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
361 skb->dev = t->ifp->nd;
362}
363
297static int 364static int
298aoecmd_ata_rw(struct aoedev *d) 365aoecmd_ata_rw(struct aoedev *d)
299{ 366{
300 struct frame *f; 367 struct frame *f;
301 struct aoe_hdr *h;
302 struct aoe_atahdr *ah;
303 struct buf *buf; 368 struct buf *buf;
304 struct aoetgt *t; 369 struct aoetgt *t;
305 struct sk_buff *skb; 370 struct sk_buff *skb;
306 struct sk_buff_head queue; 371 struct sk_buff_head queue;
307 ulong bcnt, fbcnt; 372 ulong bcnt, fbcnt;
308 char writebit, extbit;
309
310 writebit = 0x10;
311 extbit = 0x4;
312 373
313 buf = nextbuf(d); 374 buf = nextbuf(d);
314 if (buf == NULL) 375 if (buf == NULL)
@@ -343,50 +404,15 @@ aoecmd_ata_rw(struct aoedev *d)
343 } while (fbcnt); 404 } while (fbcnt);
344 405
345 /* initialize the headers & frame */ 406 /* initialize the headers & frame */
346 skb = f->skb;
347 h = (struct aoe_hdr *) skb_mac_header(skb);
348 ah = (struct aoe_atahdr *) (h+1);
349 skb_put(skb, sizeof *h + sizeof *ah);
350 memset(h, 0, skb->len);
351 f->tag = aoehdr_atainit(d, t, h);
352 fhash(f);
353 t->nout++;
354 f->waited = 0;
355 f->waited_total = 0;
356 f->buf = buf; 407 f->buf = buf;
357 f->bcnt = bcnt; 408 f->bcnt = bcnt;
358 f->lba = buf->sector; 409 ata_rw_frameinit(f);
359
360 /* set up ata header */
361 ah->scnt = bcnt >> 9;
362 put_lba(ah, buf->sector);
363 if (d->flags & DEVFL_EXT) {
364 ah->aflags |= AOEAFL_EXT;
365 } else {
366 extbit = 0;
367 ah->lba3 &= 0x0f;
368 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
369 }
370 if (bio_data_dir(buf->bio) == WRITE) {
371 skb_fillup(skb, f->bv, f->bv_off, bcnt);
372 ah->aflags |= AOEAFL_WRITE;
373 skb->len += bcnt;
374 skb->data_len = bcnt;
375 skb->truesize += bcnt;
376 t->wpkts++;
377 } else {
378 t->rpkts++;
379 writebit = 0;
380 }
381
382 ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
383 410
384 /* mark all tracking fields and load out */ 411 /* mark all tracking fields and load out */
385 buf->nframesout += 1; 412 buf->nframesout += 1;
386 buf->sector += bcnt >> 9; 413 buf->sector += bcnt >> 9;
387 414
388 skb->dev = t->ifp->nd; 415 skb = skb_clone(f->skb, GFP_ATOMIC);
389 skb = skb_clone(skb, GFP_ATOMIC);
390 if (skb) { 416 if (skb) {
391 do_gettimeofday(&f->sent); 417 do_gettimeofday(&f->sent);
392 f->sent_jiffs = (u32) jiffies; 418 f->sent_jiffs = (u32) jiffies;
@@ -462,11 +488,14 @@ resend(struct aoedev *d, struct frame *f)
462 h = (struct aoe_hdr *) skb_mac_header(skb); 488 h = (struct aoe_hdr *) skb_mac_header(skb);
463 ah = (struct aoe_atahdr *) (h+1); 489 ah = (struct aoe_atahdr *) (h+1);
464 490
465 snprintf(buf, sizeof buf, 491 if (!(f->flags & FFL_PROBE)) {
466 "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n", 492 snprintf(buf, sizeof(buf),
467 "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n, 493 "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
468 h->src, h->dst, t->nout); 494 "retransmit", d->aoemajor, d->aoeminor,
469 aoechr_error(buf); 495 f->tag, jiffies, n,
496 h->src, h->dst, t->nout);
497 aoechr_error(buf);
498 }
470 499
471 f->tag = n; 500 f->tag = n;
472 fhash(f); 501 fhash(f);
@@ -558,18 +587,18 @@ ejectif(struct aoetgt *t, struct aoeif *ifp)
558} 587}
559 588
560static struct frame * 589static struct frame *
561reassign_frame(struct list_head *pos) 590reassign_frame(struct frame *f)
562{ 591{
563 struct frame *f;
564 struct frame *nf; 592 struct frame *nf;
565 struct sk_buff *skb; 593 struct sk_buff *skb;
566 594
567 f = list_entry(pos, struct frame, head);
568 nf = newframe(f->t->d); 595 nf = newframe(f->t->d);
569 if (!nf) 596 if (!nf)
570 return NULL; 597 return NULL;
571 598 if (nf->t == f->t) {
572 list_del(pos); 599 aoe_freetframe(nf);
600 return NULL;
601 }
573 602
574 skb = nf->skb; 603 skb = nf->skb;
575 nf->skb = f->skb; 604 nf->skb = f->skb;
@@ -583,52 +612,67 @@ reassign_frame(struct list_head *pos)
583 nf->sent = f->sent; 612 nf->sent = f->sent;
584 nf->sent_jiffs = f->sent_jiffs; 613 nf->sent_jiffs = f->sent_jiffs;
585 f->skb = skb; 614 f->skb = skb;
586 aoe_freetframe(f);
587 f->t->nout--;
588 nf->t->nout++;
589 615
590 return nf; 616 return nf;
591} 617}
592 618
593static int 619static void
594sthtith(struct aoedev *d) 620probe(struct aoetgt *t)
595{ 621{
596 struct frame *f, *nf; 622 struct aoedev *d;
597 struct list_head *nx, *pos, *head; 623 struct frame *f;
598 struct aoetgt *ht = d->htgt; 624 struct sk_buff *skb;
599 int i; 625 struct sk_buff_head queue;
626 size_t n, m;
627 int frag;
600 628
601 /* look through the active and pending retransmit frames */ 629 d = t->d;
602 for (i = 0; i < NFACTIVE; i++) { 630 f = newtframe(d, t);
603 head = &d->factive[i]; 631 if (!f) {
604 list_for_each_safe(pos, nx, head) { 632 pr_err("%s %pm for e%ld.%d: %s\n",
605 f = list_entry(pos, struct frame, head); 633 "aoe: cannot probe remote address",
606 if (f->t != ht) 634 t->addr,
607 continue; 635 (long) d->aoemajor, d->aoeminor,
608 nf = reassign_frame(pos); 636 "no frame available");
609 if (!nf) 637 return;
610 return 0;
611 resend(d, nf);
612 }
613 } 638 }
614 head = &d->rexmitq; 639 f->flags |= FFL_PROBE;
615 list_for_each_safe(pos, nx, head) { 640 ifrotate(t);
616 f = list_entry(pos, struct frame, head); 641 f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
617 if (f->t != ht) 642 ata_rw_frameinit(f);
618 continue; 643 skb = f->skb;
619 nf = reassign_frame(pos); 644 for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) {
620 if (!nf) 645 if (n < PAGE_SIZE)
621 return 0; 646 m = n;
622 resend(d, nf); 647 else
648 m = PAGE_SIZE;
649 skb_fill_page_desc(skb, frag, empty_page, 0, m);
623 } 650 }
624 /* We've cleaned up the outstanding so take away his 651 skb->len += f->bcnt;
625 * interfaces so he won't be used. We should remove him from 652 skb->data_len = f->bcnt;
626 * the target array here, but cleaning up a target is 653 skb->truesize += f->bcnt;
627 * involved. PUNT! 654
628 */ 655 skb = skb_clone(f->skb, GFP_ATOMIC);
629 memset(ht->ifs, 0, sizeof ht->ifs); 656 if (skb) {
630 d->htgt = NULL; 657 do_gettimeofday(&f->sent);
631 return 1; 658 f->sent_jiffs = (u32) jiffies;
659 __skb_queue_head_init(&queue);
660 __skb_queue_tail(&queue, skb);
661 aoenet_xmit(&queue);
662 }
663}
664
665static long
666rto(struct aoedev *d)
667{
668 long t;
669
670 t = 2 * d->rttavg >> RTTSCALE;
671 t += 8 * d->rttdev >> RTTDSCALE;
672 if (t == 0)
673 t = 1;
674
675 return t;
632} 676}
633 677
634static void 678static void
@@ -636,17 +680,53 @@ rexmit_deferred(struct aoedev *d)
636{ 680{
637 struct aoetgt *t; 681 struct aoetgt *t;
638 struct frame *f; 682 struct frame *f;
683 struct frame *nf;
639 struct list_head *pos, *nx, *head; 684 struct list_head *pos, *nx, *head;
640 int since; 685 int since;
686 int untainted;
687
688 count_targets(d, &untainted);
641 689
642 head = &d->rexmitq; 690 head = &d->rexmitq;
643 list_for_each_safe(pos, nx, head) { 691 list_for_each_safe(pos, nx, head) {
644 f = list_entry(pos, struct frame, head); 692 f = list_entry(pos, struct frame, head);
645 t = f->t; 693 t = f->t;
694 if (t->taint) {
695 if (!(f->flags & FFL_PROBE)) {
696 nf = reassign_frame(f);
697 if (nf) {
698 if (t->nout_probes == 0
699 && untainted > 0) {
700 probe(t);
701 t->nout_probes++;
702 }
703 list_replace(&f->head, &nf->head);
704 pos = &nf->head;
705 aoe_freetframe(f);
706 f = nf;
707 t = f->t;
708 }
709 } else if (untainted < 1) {
710 /* don't probe w/o other untainted aoetgts */
711 goto stop_probe;
712 } else if (tsince_hr(f) < t->taint * rto(d)) {
713 /* reprobe slowly when taint is high */
714 continue;
715 }
716 } else if (f->flags & FFL_PROBE) {
717stop_probe: /* don't probe untainted aoetgts */
718 list_del(pos);
719 aoe_freetframe(f);
720 /* leaving d->kicked, because this is routine */
721 f->t->d->flags |= DEVFL_KICKME;
722 continue;
723 }
646 if (t->nout >= t->maxout) 724 if (t->nout >= t->maxout)
647 continue; 725 continue;
648 list_del(pos); 726 list_del(pos);
649 t->nout++; 727 t->nout++;
728 if (f->flags & FFL_PROBE)
729 t->nout_probes++;
650 since = tsince_hr(f); 730 since = tsince_hr(f);
651 f->waited += since; 731 f->waited += since;
652 f->waited_total += since; 732 f->waited_total += since;
@@ -654,6 +734,36 @@ rexmit_deferred(struct aoedev *d)
654 } 734 }
655} 735}
656 736
737/* An aoetgt accumulates demerits quickly, and successful
738 * probing redeems the aoetgt slowly.
739 */
740static void
741scorn(struct aoetgt *t)
742{
743 int n;
744
745 n = t->taint++;
746 t->taint += t->taint * 2;
747 if (n > t->taint)
748 t->taint = n;
749 if (t->taint > MAX_TAINT)
750 t->taint = MAX_TAINT;
751}
752
753static int
754count_targets(struct aoedev *d, int *untainted)
755{
756 int i, good;
757
758 for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
759 if (d->targets[i]->taint == 0)
760 good++;
761
762 if (untainted)
763 *untainted = good;
764 return i;
765}
766
657static void 767static void
658rexmit_timer(ulong vp) 768rexmit_timer(ulong vp)
659{ 769{
@@ -666,6 +776,7 @@ rexmit_timer(ulong vp)
666 register long timeout; 776 register long timeout;
667 ulong flags, n; 777 ulong flags, n;
668 int i; 778 int i;
779 int utgts; /* number of aoetgt descriptors (not slots) */
669 int since; 780 int since;
670 781
671 d = (struct aoedev *) vp; 782 d = (struct aoedev *) vp;
@@ -673,10 +784,9 @@ rexmit_timer(ulong vp)
673 spin_lock_irqsave(&d->lock, flags); 784 spin_lock_irqsave(&d->lock, flags);
674 785
675 /* timeout based on observed timings and variations */ 786 /* timeout based on observed timings and variations */
676 timeout = 2 * d->rttavg >> RTTSCALE; 787 timeout = rto(d);
677 timeout += 8 * d->rttdev >> RTTDSCALE; 788
678 if (timeout == 0) 789 utgts = count_targets(d, NULL);
679 timeout = 1;
680 790
681 if (d->flags & DEVFL_TKILL) { 791 if (d->flags & DEVFL_TKILL) {
682 spin_unlock_irqrestore(&d->lock, flags); 792 spin_unlock_irqrestore(&d->lock, flags);
@@ -702,7 +812,7 @@ rexmit_timer(ulong vp)
702 since = tsince_hr(f); 812 since = tsince_hr(f);
703 n = f->waited_total + since; 813 n = f->waited_total + since;
704 n /= USEC_PER_SEC; 814 n /= USEC_PER_SEC;
705 if (n > aoe_deadsecs) { 815 if (n > aoe_deadsecs && !(f->flags & FFL_PROBE)) {
706 /* Waited too long. Device failure. 816 /* Waited too long. Device failure.
707 * Hang all frames on first hash bucket for downdev 817 * Hang all frames on first hash bucket for downdev
708 * to clean up. 818 * to clean up.
@@ -713,19 +823,26 @@ rexmit_timer(ulong vp)
713 } 823 }
714 824
715 t = f->t; 825 t = f->t;
716 if (n > aoe_deadsecs/2) 826 n = f->waited + since;
717 d->htgt = t; /* see if another target can help */ 827 n /= USEC_PER_SEC;
828 if (aoe_deadsecs && utgts > 0
829 && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
830 scorn(t); /* avoid this target */
718 831
719 if (t->maxout != 1) { 832 if (t->maxout != 1) {
720 t->ssthresh = t->maxout / 2; 833 t->ssthresh = t->maxout / 2;
721 t->maxout = 1; 834 t->maxout = 1;
722 } 835 }
723 836
724 ifp = getif(t, f->skb->dev); 837 if (f->flags & FFL_PROBE) {
725 if (ifp && ++ifp->lost > (t->nframes << 1) 838 t->nout_probes--;
726 && (ifp != t->ifs || t->ifs[1].nd)) { 839 } else {
727 ejectif(t, ifp); 840 ifp = getif(t, f->skb->dev);
728 ifp = NULL; 841 if (ifp && ++ifp->lost > (t->nframes << 1)
842 && (ifp != t->ifs || t->ifs[1].nd)) {
843 ejectif(t, ifp);
844 ifp = NULL;
845 }
729 } 846 }
730 list_move_tail(pos, &d->rexmitq); 847 list_move_tail(pos, &d->rexmitq);
731 t->nout--; 848 t->nout--;
@@ -733,7 +850,7 @@ rexmit_timer(ulong vp)
733 rexmit_deferred(d); 850 rexmit_deferred(d);
734 851
735out: 852out:
736 if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) { 853 if ((d->flags & DEVFL_KICKME) && d->blkq) {
737 d->flags &= ~DEVFL_KICKME; 854 d->flags &= ~DEVFL_KICKME;
738 d->blkq->request_fn(d->blkq); 855 d->blkq->request_fn(d->blkq);
739 } 856 }
@@ -854,8 +971,6 @@ nextbuf(struct aoedev *d)
854void 971void
855aoecmd_work(struct aoedev *d) 972aoecmd_work(struct aoedev *d)
856{ 973{
857 if (d->htgt && !sthtith(d))
858 return;
859 rexmit_deferred(d); 974 rexmit_deferred(d);
860 while (aoecmd_ata_rw(d)) 975 while (aoecmd_ata_rw(d))
861 ; 976 ;
@@ -1065,19 +1180,22 @@ ktiocomplete(struct frame *f)
1065 struct aoeif *ifp; 1180 struct aoeif *ifp;
1066 struct aoedev *d; 1181 struct aoedev *d;
1067 long n; 1182 long n;
1183 int untainted;
1068 1184
1069 if (f == NULL) 1185 if (f == NULL)
1070 return; 1186 return;
1071 1187
1072 t = f->t; 1188 t = f->t;
1073 d = t->d; 1189 d = t->d;
1190 skb = f->r_skb;
1191 buf = f->buf;
1192 if (f->flags & FFL_PROBE)
1193 goto out;
1194 if (!skb) /* just fail the buf. */
1195 goto noskb;
1074 1196
1075 hout = (struct aoe_hdr *) skb_mac_header(f->skb); 1197 hout = (struct aoe_hdr *) skb_mac_header(f->skb);
1076 ahout = (struct aoe_atahdr *) (hout+1); 1198 ahout = (struct aoe_atahdr *) (hout+1);
1077 buf = f->buf;
1078 skb = f->r_skb;
1079 if (skb == NULL)
1080 goto noskb; /* just fail the buf. */
1081 1199
1082 hin = (struct aoe_hdr *) skb->data; 1200 hin = (struct aoe_hdr *) skb->data;
1083 skb_pull(skb, sizeof(*hin)); 1201 skb_pull(skb, sizeof(*hin));
@@ -1089,7 +1207,7 @@ ktiocomplete(struct frame *f)
1089 d->aoemajor, d->aoeminor); 1207 d->aoemajor, d->aoeminor);
1090noskb: if (buf) 1208noskb: if (buf)
1091 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); 1209 clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
1092 goto badrsp; 1210 goto out;
1093 } 1211 }
1094 1212
1095 n = ahout->scnt << 9; 1213 n = ahout->scnt << 9;
@@ -1109,8 +1227,6 @@ noskb: if (buf)
1109 ifp = getif(t, skb->dev); 1227 ifp = getif(t, skb->dev);
1110 if (ifp) 1228 if (ifp)
1111 ifp->lost = 0; 1229 ifp->lost = 0;
1112 if (d->htgt == t) /* I'll help myself, thank you. */
1113 d->htgt = NULL;
1114 spin_unlock_irq(&d->lock); 1230 spin_unlock_irq(&d->lock);
1115 break; 1231 break;
1116 case ATA_CMD_ID_ATA: 1232 case ATA_CMD_ID_ATA:
@@ -1131,8 +1247,17 @@ noskb: if (buf)
1131 be16_to_cpu(get_unaligned(&hin->major)), 1247 be16_to_cpu(get_unaligned(&hin->major)),
1132 hin->minor); 1248 hin->minor);
1133 } 1249 }
1134badrsp: 1250out:
1135 spin_lock_irq(&d->lock); 1251 spin_lock_irq(&d->lock);
1252 if (t->taint > 0
1253 && --t->taint > 0
1254 && t->nout_probes == 0) {
1255 count_targets(d, &untainted);
1256 if (untainted > 0) {
1257 probe(t);
1258 t->nout_probes++;
1259 }
1260 }
1136 1261
1137 aoe_freetframe(f); 1262 aoe_freetframe(f);
1138 1263
@@ -1261,6 +1386,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
1261 if (f) { 1386 if (f) {
1262 calc_rttavg(d, f->t, tsince_hr(f)); 1387 calc_rttavg(d, f->t, tsince_hr(f));
1263 f->t->nout--; 1388 f->t->nout--;
1389 if (f->flags & FFL_PROBE)
1390 f->t->nout_probes--;
1264 } else { 1391 } else {
1265 f = getframe_deferred(d, n); 1392 f = getframe_deferred(d, n);
1266 if (f) { 1393 if (f) {
@@ -1379,6 +1506,7 @@ addtgt(struct aoedev *d, char *addr, ulong nframes)
1379 memcpy(t->addr, addr, sizeof t->addr); 1506 memcpy(t->addr, addr, sizeof t->addr);
1380 t->ifp = t->ifs; 1507 t->ifp = t->ifs;
1381 aoecmd_wreset(t); 1508 aoecmd_wreset(t);
1509 t->maxout = t->nframes / 2;
1382 INIT_LIST_HEAD(&t->ffree); 1510 INIT_LIST_HEAD(&t->ffree);
1383 return *tt = t; 1511 return *tt = t;
1384} 1512}
@@ -1584,6 +1712,14 @@ aoe_flush_iocq(void)
1584int __init 1712int __init
1585aoecmd_init(void) 1713aoecmd_init(void)
1586{ 1714{
1715 void *p;
1716
1717 /* get_zeroed_page returns page with ref count 1 */
1718 p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
1719 if (!p)
1720 return -ENOMEM;
1721 empty_page = virt_to_page(p);
1722
1587 INIT_LIST_HEAD(&iocq.head); 1723 INIT_LIST_HEAD(&iocq.head);
1588 spin_lock_init(&iocq.lock); 1724 spin_lock_init(&iocq.lock);
1589 init_waitqueue_head(&ktiowq); 1725 init_waitqueue_head(&ktiowq);
@@ -1599,4 +1735,7 @@ aoecmd_exit(void)
1599{ 1735{
1600 aoe_ktstop(&kts); 1736 aoe_ktstop(&kts);
1601 aoe_flush_iocq(); 1737 aoe_flush_iocq();
1738
1739 free_page((unsigned long) page_address(empty_page));
1740 empty_page = NULL;
1602} 1741}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index aaaea662a72a..f0c0c7416aed 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -223,7 +223,6 @@ aoedev_downdev(struct aoedev *d)
223 223
224 /* clean out the in-process request (if any) */ 224 /* clean out the in-process request (if any) */
225 aoe_failip(d); 225 aoe_failip(d);
226 d->htgt = NULL;
227 226
228 /* fast fail all pending I/O */ 227 /* fast fail all pending I/O */
229 if (d->blkq) { 228 if (d->blkq) {