aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEd L. Cashin <ecashin@coraid.com>2008-02-08 07:20:00 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-08 12:22:31 -0500
commit68e0d42f39d85b334d3867a4e5fc2e0e775c1a6c (patch)
treef4cdb47157de4dace5b99e69fc39ea017495922e
parent8911ef4dc97f77797f297318010a7424300d2d50 (diff)
aoe: handle multiple network paths to AoE device
A remote AoE device is something can process ATA commands and is identified by an AoE shelf number and an AoE slot number. Such a device might have more than one network interface, and it might be reachable by more than one local network interface. This patch tracks the available network paths available to each AoE device, allowing them to be used more efficiently. Andrew Morton asked about the call to msleep_interruptible in the revalidate function. Yes, if a signal is pending, then msleep_interruptible will not return 0. That means we will not loop but will call aoenet_xmit with a NULL skb, which is a noop. If the system is too low on memory or the aoe driver is too low on frames, then the user can hit control-C to interrupt the attempt to do a revalidate. I have added a comment to the code summarizing that. Andrew Morton asked whether the allocation performed inside addtgt could use a more relaxed allocation like GFP_KERNEL, but addtgt is called when the aoedev lock has been locked with spin_lock_irqsave. It would be nice to allocate the memory under fewer restrictions, but targets are only added when the device is being discovered, and if the target can't be added right now, we can try again in a minute when then next AoE config query broadcast goes out. Andrew Morton pointed out that the "too many targets" message could be printed for failing GFP_ATOMIC allocations. The last patch in this series makes the messages more specific. Signed-off-by: Ed L. Cashin <ecashin@coraid.com> Cc: Greg KH <greg@kroah.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/block/aoe/aoe.h57
-rw-r--r--drivers/block/aoe/aoeblk.c62
-rw-r--r--drivers/block/aoe/aoechr.c17
-rw-r--r--drivers/block/aoe/aoecmd.c675
-rw-r--r--drivers/block/aoe/aoedev.c168
-rw-r--r--drivers/block/aoe/aoenet.c9
6 files changed, 653 insertions, 335 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 4d0543a145df..87df18bf4dea 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -76,10 +76,8 @@ enum {
76 DEVFL_EXT = (1<<2), /* device accepts lba48 commands */ 76 DEVFL_EXT = (1<<2), /* device accepts lba48 commands */
77 DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */ 77 DEVFL_CLOSEWAIT = (1<<3), /* device is waiting for all closes to revalidate */
78 DEVFL_GDALLOC = (1<<4), /* need to alloc gendisk */ 78 DEVFL_GDALLOC = (1<<4), /* need to alloc gendisk */
79 DEVFL_PAUSE = (1<<5), 79 DEVFL_KICKME = (1<<5), /* slow polling network card catch */
80 DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */ 80 DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */
81 DEVFL_MAXBCNT = (1<<7), /* d->maxbcnt is not changeable */
82 DEVFL_KICKME = (1<<8),
83 81
84 BUFFL_FAIL = 1, 82 BUFFL_FAIL = 1,
85}; 83};
@@ -88,17 +86,24 @@ enum {
88 DEFAULTBCNT = 2 * 512, /* 2 sectors */ 86 DEFAULTBCNT = 2 * 512, /* 2 sectors */
89 NPERSHELF = 16, /* number of slots per shelf address */ 87 NPERSHELF = 16, /* number of slots per shelf address */
90 FREETAG = -1, 88 FREETAG = -1,
91 MIN_BUFS = 8, 89 MIN_BUFS = 16,
90 NTARGETS = 8,
91 NAOEIFS = 8,
92
93 TIMERTICK = HZ / 10,
94 MINTIMER = HZ >> 2,
95 MAXTIMER = HZ << 1,
96 HELPWAIT = 20,
92}; 97};
93 98
94struct buf { 99struct buf {
95 struct list_head bufs; 100 struct list_head bufs;
96 ulong start_time; /* for disk stats */ 101 ulong stime; /* for disk stats */
97 ulong flags; 102 ulong flags;
98 ulong nframesout; 103 ulong nframesout;
99 char *bufaddr;
100 ulong resid; 104 ulong resid;
101 ulong bv_resid; 105 ulong bv_resid;
106 ulong bv_off;
102 sector_t sector; 107 sector_t sector;
103 struct bio *bio; 108 struct bio *bio;
104 struct bio_vec *bv; 109 struct bio_vec *bv;
@@ -114,19 +119,37 @@ struct frame {
114 struct sk_buff *skb; 119 struct sk_buff *skb;
115}; 120};
116 121
122struct aoeif {
123 struct net_device *nd;
124 unsigned char lost;
125 unsigned char lostjumbo;
126 ushort maxbcnt;
127};
128
129struct aoetgt {
130 unsigned char addr[6];
131 ushort nframes;
132 struct frame *frames;
133 struct aoeif ifs[NAOEIFS];
134 struct aoeif *ifp; /* current aoeif in use */
135 ushort nout;
136 ushort maxout;
137 u16 lasttag; /* last tag sent */
138 u16 useme;
139 ulong lastwadj; /* last window adjustment */
140 int wpkts, rpkts;
141};
142
117struct aoedev { 143struct aoedev {
118 struct aoedev *next; 144 struct aoedev *next;
119 unsigned char addr[6]; /* remote mac addr */
120 ushort flags;
121 ulong sysminor; 145 ulong sysminor;
122 ulong aoemajor; 146 ulong aoemajor;
123 ulong aoeminor; 147 u16 aoeminor;
148 u16 flags;
124 u16 nopen; /* (bd_openers isn't available without sleeping) */ 149 u16 nopen; /* (bd_openers isn't available without sleeping) */
125 u16 lasttag; /* last tag sent */
126 u16 rttavg; /* round trip average of requests/responses */ 150 u16 rttavg; /* round trip average of requests/responses */
127 u16 mintimer; 151 u16 mintimer;
128 u16 fw_ver; /* version of blade's firmware */ 152 u16 fw_ver; /* version of blade's firmware */
129 u16 maxbcnt;
130 struct work_struct work;/* disk create work struct */ 153 struct work_struct work;/* disk create work struct */
131 struct gendisk *gd; 154 struct gendisk *gd;
132 struct request_queue blkq; 155 struct request_queue blkq;
@@ -134,15 +157,14 @@ struct aoedev {
134 sector_t ssize; 157 sector_t ssize;
135 struct timer_list timer; 158 struct timer_list timer;
136 spinlock_t lock; 159 spinlock_t lock;
137 struct net_device *ifp; /* interface ed is attached to */
138 struct sk_buff *sendq_hd; /* packets needing to be sent, list head */ 160 struct sk_buff *sendq_hd; /* packets needing to be sent, list head */
139 struct sk_buff *sendq_tl; 161 struct sk_buff *sendq_tl;
140 mempool_t *bufpool; /* for deadlock-free Buf allocation */ 162 mempool_t *bufpool; /* for deadlock-free Buf allocation */
141 struct list_head bufq; /* queue of bios to work on */ 163 struct list_head bufq; /* queue of bios to work on */
142 struct buf *inprocess; /* the one we're currently working on */ 164 struct buf *inprocess; /* the one we're currently working on */
143 ushort lostjumbo; 165 struct aoetgt *targets[NTARGETS];
144 ushort nframes; /* number of frames below */ 166 struct aoetgt **tgt; /* target in use when working */
145 struct frame *frames; 167 struct aoetgt **htgt; /* target needing rexmit assistance */
146}; 168};
147 169
148 170
@@ -160,12 +182,13 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor);
160void aoecmd_ata_rsp(struct sk_buff *); 182void aoecmd_ata_rsp(struct sk_buff *);
161void aoecmd_cfg_rsp(struct sk_buff *); 183void aoecmd_cfg_rsp(struct sk_buff *);
162void aoecmd_sleepwork(struct work_struct *); 184void aoecmd_sleepwork(struct work_struct *);
163struct sk_buff *new_skb(ulong); 185void aoecmd_cleanslate(struct aoedev *);
186struct sk_buff *aoecmd_ata_id(struct aoedev *);
164 187
165int aoedev_init(void); 188int aoedev_init(void);
166void aoedev_exit(void); 189void aoedev_exit(void);
167struct aoedev *aoedev_by_aoeaddr(int maj, int min); 190struct aoedev *aoedev_by_aoeaddr(int maj, int min);
168struct aoedev *aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt); 191struct aoedev *aoedev_by_sysminor_m(ulong sysminor);
169void aoedev_downdev(struct aoedev *d); 192void aoedev_downdev(struct aoedev *d);
170int aoedev_isbusy(struct aoedev *d); 193int aoedev_isbusy(struct aoedev *d);
171 194
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 826d12381e21..c2649c954278 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -24,7 +24,7 @@ static ssize_t aoedisk_show_state(struct device *dev,
24 return snprintf(page, PAGE_SIZE, 24 return snprintf(page, PAGE_SIZE,
25 "%s%s\n", 25 "%s%s\n",
26 (d->flags & DEVFL_UP) ? "up" : "down", 26 (d->flags & DEVFL_UP) ? "up" : "down",
27 (d->flags & DEVFL_PAUSE) ? ",paused" : 27 (d->flags & DEVFL_KICKME) ? ",kickme" :
28 (d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : ""); 28 (d->nopen && !(d->flags & DEVFL_UP)) ? ",closewait" : "");
29 /* I'd rather see nopen exported so we can ditch closewait */ 29 /* I'd rather see nopen exported so we can ditch closewait */
30} 30}
@@ -33,17 +33,49 @@ static ssize_t aoedisk_show_mac(struct device *dev,
33{ 33{
34 struct gendisk *disk = dev_to_disk(dev); 34 struct gendisk *disk = dev_to_disk(dev);
35 struct aoedev *d = disk->private_data; 35 struct aoedev *d = disk->private_data;
36 struct aoetgt *t = d->targets[0];
36 37
38 if (t == NULL)
39 return snprintf(page, PAGE_SIZE, "none\n");
37 return snprintf(page, PAGE_SIZE, "%012llx\n", 40 return snprintf(page, PAGE_SIZE, "%012llx\n",
38 (unsigned long long)mac_addr(d->addr)); 41 (unsigned long long)mac_addr(t->addr));
39} 42}
40static ssize_t aoedisk_show_netif(struct device *dev, 43static ssize_t aoedisk_show_netif(struct device *dev,
41 struct device_attribute *attr, char *page) 44 struct device_attribute *attr, char *page)
42{ 45{
43 struct gendisk *disk = dev_to_disk(dev); 46 struct gendisk *disk = dev_to_disk(dev);
44 struct aoedev *d = disk->private_data; 47 struct aoedev *d = disk->private_data;
48 struct net_device *nds[8], **nd, **nnd, **ne;
49 struct aoetgt **t, **te;
50 struct aoeif *ifp, *e;
51 char *p;
52
53 memset(nds, 0, sizeof nds);
54 nd = nds;
55 ne = nd + ARRAY_SIZE(nds);
56 t = d->targets;
57 te = t + NTARGETS;
58 for (; t < te && *t; t++) {
59 ifp = (*t)->ifs;
60 e = ifp + NAOEIFS;
61 for (; ifp < e && ifp->nd; ifp++) {
62 for (nnd = nds; nnd < nd; nnd++)
63 if (*nnd == ifp->nd)
64 break;
65 if (nnd == nd && nd != ne)
66 *nd++ = ifp->nd;
67 }
68 }
45 69
46 return snprintf(page, PAGE_SIZE, "%s\n", d->ifp->name); 70 ne = nd;
71 nd = nds;
72 if (*nd == NULL)
73 return snprintf(page, PAGE_SIZE, "none\n");
74 for (p = page; nd < ne; nd++)
75 p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
76 p == page ? "" : ",", (*nd)->name);
77 p += snprintf(p, PAGE_SIZE - (p-page), "\n");
78 return p-page;
47} 79}
48/* firmware version */ 80/* firmware version */
49static ssize_t aoedisk_show_fwver(struct device *dev, 81static ssize_t aoedisk_show_fwver(struct device *dev,
@@ -134,7 +166,23 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
134 166
135 blk_queue_bounce(q, &bio); 167 blk_queue_bounce(q, &bio);
136 168
169 if (bio == NULL) {
170 printk(KERN_ERR "aoe: bio is NULL\n");
171 BUG();
172 return 0;
173 }
137 d = bio->bi_bdev->bd_disk->private_data; 174 d = bio->bi_bdev->bd_disk->private_data;
175 if (d == NULL) {
176 printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n");
177 BUG();
178 bio_endio(bio, -ENXIO);
179 return 0;
180 } else if (bio->bi_io_vec == NULL) {
181 printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
182 BUG();
183 bio_endio(bio, -ENXIO);
184 return 0;
185 }
138 buf = mempool_alloc(d->bufpool, GFP_NOIO); 186 buf = mempool_alloc(d->bufpool, GFP_NOIO);
139 if (buf == NULL) { 187 if (buf == NULL) {
140 printk(KERN_INFO "aoe: buf allocation failure\n"); 188 printk(KERN_INFO "aoe: buf allocation failure\n");
@@ -143,14 +191,14 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
143 } 191 }
144 memset(buf, 0, sizeof(*buf)); 192 memset(buf, 0, sizeof(*buf));
145 INIT_LIST_HEAD(&buf->bufs); 193 INIT_LIST_HEAD(&buf->bufs);
146 buf->start_time = jiffies; 194 buf->stime = jiffies;
147 buf->bio = bio; 195 buf->bio = bio;
148 buf->resid = bio->bi_size; 196 buf->resid = bio->bi_size;
149 buf->sector = bio->bi_sector; 197 buf->sector = bio->bi_sector;
150 buf->bv = &bio->bi_io_vec[bio->bi_idx]; 198 buf->bv = &bio->bi_io_vec[bio->bi_idx];
151 WARN_ON(buf->bv->bv_len == 0);
152 buf->bv_resid = buf->bv->bv_len; 199 buf->bv_resid = buf->bv->bv_len;
153 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; 200 WARN_ON(buf->bv_resid == 0);
201 buf->bv_off = buf->bv->bv_offset;
154 202
155 spin_lock_irqsave(&d->lock, flags); 203 spin_lock_irqsave(&d->lock, flags);
156 204
@@ -229,7 +277,7 @@ aoeblk_gdalloc(void *vp)
229 gd->fops = &aoe_bdops; 277 gd->fops = &aoe_bdops;
230 gd->private_data = d; 278 gd->private_data = d;
231 gd->capacity = d->ssize; 279 gd->capacity = d->ssize;
232 snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%ld", 280 snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
233 d->aoemajor, d->aoeminor); 281 d->aoemajor, d->aoeminor);
234 282
235 gd->queue = &d->blkq; 283 gd->queue = &d->blkq;
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index d5480e34cb22..03c7f4ab5624 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -6,6 +6,7 @@
6 6
7#include <linux/hdreg.h> 7#include <linux/hdreg.h>
8#include <linux/blkdev.h> 8#include <linux/blkdev.h>
9#include <linux/delay.h>
9#include "aoe.h" 10#include "aoe.h"
10 11
11enum { 12enum {
@@ -68,6 +69,7 @@ revalidate(const char __user *str, size_t size)
68 int major, minor, n; 69 int major, minor, n;
69 ulong flags; 70 ulong flags;
70 struct aoedev *d; 71 struct aoedev *d;
72 struct sk_buff *skb;
71 char buf[16]; 73 char buf[16];
72 74
73 if (size >= sizeof buf) 75 if (size >= sizeof buf)
@@ -85,13 +87,20 @@ revalidate(const char __user *str, size_t size)
85 d = aoedev_by_aoeaddr(major, minor); 87 d = aoedev_by_aoeaddr(major, minor);
86 if (!d) 88 if (!d)
87 return -EINVAL; 89 return -EINVAL;
88
89 spin_lock_irqsave(&d->lock, flags); 90 spin_lock_irqsave(&d->lock, flags);
90 d->flags &= ~DEVFL_MAXBCNT; 91 aoecmd_cleanslate(d);
91 d->flags |= DEVFL_PAUSE; 92loop:
93 skb = aoecmd_ata_id(d);
92 spin_unlock_irqrestore(&d->lock, flags); 94 spin_unlock_irqrestore(&d->lock, flags);
95 /* try again if we are able to sleep a bit,
96 * otherwise give up this revalidation
97 */
98 if (!skb && !msleep_interruptible(200)) {
99 spin_lock_irqsave(&d->lock, flags);
100 goto loop;
101 }
102 aoenet_xmit(skb);
93 aoecmd_cfg(major, minor); 103 aoecmd_cfg(major, minor);
94
95 return 0; 104 return 0;
96} 105}
97 106
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 4d59d5057734..5e7daa1ff6f6 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -9,19 +9,16 @@
9#include <linux/skbuff.h> 9#include <linux/skbuff.h>
10#include <linux/netdevice.h> 10#include <linux/netdevice.h>
11#include <linux/genhd.h> 11#include <linux/genhd.h>
12#include <linux/moduleparam.h>
12#include <net/net_namespace.h> 13#include <net/net_namespace.h>
13#include <asm/unaligned.h> 14#include <asm/unaligned.h>
14#include "aoe.h" 15#include "aoe.h"
15 16
16#define TIMERTICK (HZ / 10)
17#define MINTIMER (2 * TIMERTICK)
18#define MAXTIMER (HZ << 1)
19
20static int aoe_deadsecs = 60 * 3; 17static int aoe_deadsecs = 60 * 3;
21module_param(aoe_deadsecs, int, 0644); 18module_param(aoe_deadsecs, int, 0644);
22MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); 19MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
23 20
24struct sk_buff * 21static struct sk_buff *
25new_skb(ulong len) 22new_skb(ulong len)
26{ 23{
27 struct sk_buff *skb; 24 struct sk_buff *skb;
@@ -43,12 +40,12 @@ new_skb(ulong len)
43} 40}
44 41
45static struct frame * 42static struct frame *
46getframe(struct aoedev *d, int tag) 43getframe(struct aoetgt *t, int tag)
47{ 44{
48 struct frame *f, *e; 45 struct frame *f, *e;
49 46
50 f = d->frames; 47 f = t->frames;
51 e = f + d->nframes; 48 e = f + t->nframes;
52 for (; f<e; f++) 49 for (; f<e; f++)
53 if (f->tag == tag) 50 if (f->tag == tag)
54 return f; 51 return f;
@@ -61,21 +58,21 @@ getframe(struct aoedev *d, int tag)
61 * This driver reserves tag -1 to mean "unused frame." 58 * This driver reserves tag -1 to mean "unused frame."
62 */ 59 */
63static int 60static int
64newtag(struct aoedev *d) 61newtag(struct aoetgt *t)
65{ 62{
66 register ulong n; 63 register ulong n;
67 64
68 n = jiffies & 0xffff; 65 n = jiffies & 0xffff;
69 return n |= (++d->lasttag & 0x7fff) << 16; 66 return n |= (++t->lasttag & 0x7fff) << 16;
70} 67}
71 68
72static int 69static int
73aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) 70aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
74{ 71{
75 u32 host_tag = newtag(d); 72 u32 host_tag = newtag(t);
76 73
77 memcpy(h->src, d->ifp->dev_addr, sizeof h->src); 74 memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
78 memcpy(h->dst, d->addr, sizeof h->dst); 75 memcpy(h->dst, t->addr, sizeof h->dst);
79 h->type = __constant_cpu_to_be16(ETH_P_AOE); 76 h->type = __constant_cpu_to_be16(ETH_P_AOE);
80 h->verfl = AOE_HVER; 77 h->verfl = AOE_HVER;
81 h->major = cpu_to_be16(d->aoemajor); 78 h->major = cpu_to_be16(d->aoemajor);
@@ -98,42 +95,103 @@ put_lba(struct aoe_atahdr *ah, sector_t lba)
98} 95}
99 96
100static void 97static void
101aoecmd_ata_rw(struct aoedev *d, struct frame *f) 98ifrotate(struct aoetgt *t)
99{
100 t->ifp++;
101 if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
102 t->ifp = t->ifs;
103 if (t->ifp->nd == NULL) {
104 printk(KERN_INFO "aoe: no interface to rotate to\n");
105 BUG();
106 }
107}
108
109static struct frame *
110freeframe(struct aoedev *d)
102{ 111{
112 struct frame *f, *e;
113 struct aoetgt **t;
114 ulong n;
115
116 if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */
117 printk(KERN_ERR "aoe: NULL TARGETS!\n");
118 return NULL;
119 }
120 t = d->targets;
121 do {
122 if (t != d->htgt
123 && (*t)->ifp->nd
124 && (*t)->nout < (*t)->maxout) {
125 n = (*t)->nframes;
126 f = (*t)->frames;
127 e = f + n;
128 for (; f < e; f++) {
129 if (f->tag != FREETAG)
130 continue;
131 if (atomic_read(&skb_shinfo(f->skb)->dataref)
132 != 1) {
133 n--;
134 continue;
135 }
136 skb_shinfo(f->skb)->nr_frags = 0;
137 f->skb->data_len = 0;
138 skb_trim(f->skb, 0);
139 d->tgt = t;
140 ifrotate(*t);
141 return f;
142 }
143 if (n == 0) /* slow polling network card */
144 d->flags |= DEVFL_KICKME;
145 }
146 t++;
147 } while (t < &d->targets[NTARGETS] && *t);
148 return NULL;
149}
150
151static int
152aoecmd_ata_rw(struct aoedev *d)
153{
154 struct frame *f;
103 struct aoe_hdr *h; 155 struct aoe_hdr *h;
104 struct aoe_atahdr *ah; 156 struct aoe_atahdr *ah;
105 struct buf *buf; 157 struct buf *buf;
158 struct bio_vec *bv;
159 struct aoetgt *t;
106 struct sk_buff *skb; 160 struct sk_buff *skb;
107 ulong bcnt; 161 ulong bcnt;
108 register sector_t sector;
109 char writebit, extbit; 162 char writebit, extbit;
110 163
111 writebit = 0x10; 164 writebit = 0x10;
112 extbit = 0x4; 165 extbit = 0x4;
113 166
167 f = freeframe(d);
168 if (f == NULL)
169 return 0;
170 t = *d->tgt;
114 buf = d->inprocess; 171 buf = d->inprocess;
115 172 bv = buf->bv;
116 sector = buf->sector; 173 bcnt = t->ifp->maxbcnt;
117 bcnt = buf->bv_resid; 174 if (bcnt == 0)
118 if (bcnt > d->maxbcnt) 175 bcnt = DEFAULTBCNT;
119 bcnt = d->maxbcnt; 176 if (bcnt > buf->bv_resid)
120 177 bcnt = buf->bv_resid;
121 /* initialize the headers & frame */ 178 /* initialize the headers & frame */
122 skb = f->skb; 179 skb = f->skb;
123 h = (struct aoe_hdr *) skb_mac_header(skb); 180 h = (struct aoe_hdr *) skb_mac_header(skb);
124 ah = (struct aoe_atahdr *) (h+1); 181 ah = (struct aoe_atahdr *) (h+1);
125 skb_put(skb, sizeof *h + sizeof *ah); 182 skb_put(skb, sizeof *h + sizeof *ah);
126 memset(h, 0, skb->len); 183 memset(h, 0, skb->len);
127 f->tag = aoehdr_atainit(d, h); 184 f->tag = aoehdr_atainit(d, t, h);
185 t->nout++;
128 f->waited = 0; 186 f->waited = 0;
129 f->buf = buf; 187 f->buf = buf;
130 f->bufaddr = buf->bufaddr; 188 f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
131 f->bcnt = bcnt; 189 f->bcnt = bcnt;
132 f->lba = sector; 190 f->lba = buf->sector;
133 191
134 /* set up ata header */ 192 /* set up ata header */
135 ah->scnt = bcnt >> 9; 193 ah->scnt = bcnt >> 9;
136 put_lba(ah, sector); 194 put_lba(ah, buf->sector);
137 if (d->flags & DEVFL_EXT) { 195 if (d->flags & DEVFL_EXT) {
138 ah->aflags |= AOEAFL_EXT; 196 ah->aflags |= AOEAFL_EXT;
139 } else { 197 } else {
@@ -141,14 +199,14 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
141 ah->lba3 &= 0x0f; 199 ah->lba3 &= 0x0f;
142 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ 200 ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
143 } 201 }
144
145 if (bio_data_dir(buf->bio) == WRITE) { 202 if (bio_data_dir(buf->bio) == WRITE) {
146 skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr), 203 skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
147 offset_in_page(f->bufaddr), bcnt);
148 ah->aflags |= AOEAFL_WRITE; 204 ah->aflags |= AOEAFL_WRITE;
149 skb->len += bcnt; 205 skb->len += bcnt;
150 skb->data_len = bcnt; 206 skb->data_len = bcnt;
207 t->wpkts++;
151 } else { 208 } else {
209 t->rpkts++;
152 writebit = 0; 210 writebit = 0;
153 } 211 }
154 212
@@ -156,29 +214,29 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
156 214
157 /* mark all tracking fields and load out */ 215 /* mark all tracking fields and load out */
158 buf->nframesout += 1; 216 buf->nframesout += 1;
159 buf->bufaddr += bcnt; 217 buf->bv_off += bcnt;
160 buf->bv_resid -= bcnt; 218 buf->bv_resid -= bcnt;
161/* printk(KERN_DEBUG "aoe: bv_resid=%ld\n", buf->bv_resid); */
162 buf->resid -= bcnt; 219 buf->resid -= bcnt;
163 buf->sector += bcnt >> 9; 220 buf->sector += bcnt >> 9;
164 if (buf->resid == 0) { 221 if (buf->resid == 0) {
165 d->inprocess = NULL; 222 d->inprocess = NULL;
166 } else if (buf->bv_resid == 0) { 223 } else if (buf->bv_resid == 0) {
167 buf->bv++; 224 buf->bv = ++bv;
168 WARN_ON(buf->bv->bv_len == 0); 225 buf->bv_resid = bv->bv_len;
169 buf->bv_resid = buf->bv->bv_len; 226 WARN_ON(buf->bv_resid == 0);
170 buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset; 227 buf->bv_off = bv->bv_offset;
171 } 228 }
172 229
173 skb->dev = d->ifp; 230 skb->dev = t->ifp->nd;
174 skb = skb_clone(skb, GFP_ATOMIC); 231 skb = skb_clone(skb, GFP_ATOMIC);
175 if (skb == NULL) 232 if (skb) {
176 return; 233 if (d->sendq_hd)
177 if (d->sendq_hd) 234 d->sendq_tl->next = skb;
178 d->sendq_tl->next = skb; 235 else
179 else 236 d->sendq_hd = skb;
180 d->sendq_hd = skb; 237 d->sendq_tl = skb;
181 d->sendq_tl = skb; 238 }
239 return 1;
182} 240}
183 241
184/* some callers cannot sleep, and they can call this function, 242/* some callers cannot sleep, and they can call this function,
@@ -232,62 +290,8 @@ cont:
232 return sl; 290 return sl;
233} 291}
234 292
235static struct frame *
236freeframe(struct aoedev *d)
237{
238 struct frame *f, *e;
239 int n = 0;
240
241 f = d->frames;
242 e = f + d->nframes;
243 for (; f<e; f++) {
244 if (f->tag != FREETAG)
245 continue;
246 if (atomic_read(&skb_shinfo(f->skb)->dataref) == 1) {
247 skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0;
248 skb_trim(f->skb, 0);
249 return f;
250 }
251 n++;
252 }
253 if (n == d->nframes) /* wait for network layer */
254 d->flags |= DEVFL_KICKME;
255
256 return NULL;
257}
258
259/* enters with d->lock held */
260void
261aoecmd_work(struct aoedev *d)
262{
263 struct frame *f;
264 struct buf *buf;
265
266 if (d->flags & DEVFL_PAUSE) {
267 if (!aoedev_isbusy(d))
268 d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor,
269 d->aoeminor, &d->sendq_tl);
270 return;
271 }
272
273loop:
274 f = freeframe(d);
275 if (f == NULL)
276 return;
277 if (d->inprocess == NULL) {
278 if (list_empty(&d->bufq))
279 return;
280 buf = container_of(d->bufq.next, struct buf, bufs);
281 list_del(d->bufq.next);
282/*printk(KERN_DEBUG "aoe: bi_size=%ld\n", buf->bio->bi_size); */
283 d->inprocess = buf;
284 }
285 aoecmd_ata_rw(d, f);
286 goto loop;
287}
288
289static void 293static void
290rexmit(struct aoedev *d, struct frame *f) 294resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
291{ 295{
292 struct sk_buff *skb; 296 struct sk_buff *skb;
293 struct aoe_hdr *h; 297 struct aoe_hdr *h;
@@ -295,41 +299,45 @@ rexmit(struct aoedev *d, struct frame *f)
295 char buf[128]; 299 char buf[128];
296 u32 n; 300 u32 n;
297 301
298 n = newtag(d); 302 ifrotate(t);
303 n = newtag(t);
304 skb = f->skb;
305 h = (struct aoe_hdr *) skb_mac_header(skb);
306 ah = (struct aoe_atahdr *) (h+1);
299 307
300 snprintf(buf, sizeof buf, 308 snprintf(buf, sizeof buf,
301 "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n", 309 "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x "
302 "retransmit", 310 "s=%012llx d=%012llx nout=%d\n",
303 d->aoemajor, d->aoeminor, f->tag, jiffies, n); 311 "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
312 mac_addr(h->src), mac_addr(h->dst), t->nout);
304 aoechr_error(buf); 313 aoechr_error(buf);
305 314
306 skb = f->skb;
307 h = (struct aoe_hdr *) skb_mac_header(skb);
308 ah = (struct aoe_atahdr *) (h+1);
309 f->tag = n; 315 f->tag = n;
310 h->tag = cpu_to_be32(n); 316 h->tag = cpu_to_be32(n);
311 memcpy(h->dst, d->addr, sizeof h->dst); 317 memcpy(h->dst, t->addr, sizeof h->dst);
312 memcpy(h->src, d->ifp->dev_addr, sizeof h->src); 318 memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
313 319
314 n = DEFAULTBCNT / 512; 320 switch (ah->cmdstat) {
315 if (ah->scnt > n) { 321 default:
316 ah->scnt = n; 322 break;
323 case WIN_READ:
324 case WIN_READ_EXT:
325 case WIN_WRITE:
326 case WIN_WRITE_EXT:
327 put_lba(ah, f->lba);
328
329 n = f->bcnt;
330 if (n > DEFAULTBCNT)
331 n = DEFAULTBCNT;
332 ah->scnt = n >> 9;
317 if (ah->aflags & AOEAFL_WRITE) { 333 if (ah->aflags & AOEAFL_WRITE) {
318 skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr), 334 skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
319 offset_in_page(f->bufaddr), DEFAULTBCNT); 335 offset_in_page(f->bufaddr), n);
320 skb->len = sizeof *h + sizeof *ah + DEFAULTBCNT; 336 skb->len = sizeof *h + sizeof *ah + n;
321 skb->data_len = DEFAULTBCNT; 337 skb->data_len = n;
322 }
323 if (++d->lostjumbo > (d->nframes << 1))
324 if (d->maxbcnt != DEFAULTBCNT) {
325 printk(KERN_INFO "aoe: e%ld.%ld: too many lost jumbo on %s - using 1KB frames.\n",
326 d->aoemajor, d->aoeminor, d->ifp->name);
327 d->maxbcnt = DEFAULTBCNT;
328 d->flags |= DEVFL_MAXBCNT;
329 } 338 }
330 } 339 }
331 340 skb->dev = t->ifp->nd;
332 skb->dev = d->ifp;
333 skb = skb_clone(skb, GFP_ATOMIC); 341 skb = skb_clone(skb, GFP_ATOMIC);
334 if (skb == NULL) 342 if (skb == NULL)
335 return; 343 return;
@@ -352,10 +360,92 @@ tsince(int tag)
352 return n; 360 return n;
353} 361}
354 362
363static struct aoeif *
364getif(struct aoetgt *t, struct net_device *nd)
365{
366 struct aoeif *p, *e;
367
368 p = t->ifs;
369 e = p + NAOEIFS;
370 for (; p < e; p++)
371 if (p->nd == nd)
372 return p;
373 return NULL;
374}
375
376static struct aoeif *
377addif(struct aoetgt *t, struct net_device *nd)
378{
379 struct aoeif *p;
380
381 p = getif(t, NULL);
382 if (!p)
383 return NULL;
384 p->nd = nd;
385 p->maxbcnt = DEFAULTBCNT;
386 p->lost = 0;
387 p->lostjumbo = 0;
388 return p;
389}
390
391static void
392ejectif(struct aoetgt *t, struct aoeif *ifp)
393{
394 struct aoeif *e;
395 ulong n;
396
397 e = t->ifs + NAOEIFS - 1;
398 n = (e - ifp) * sizeof *ifp;
399 memmove(ifp, ifp+1, n);
400 e->nd = NULL;
401}
402
403static int
404sthtith(struct aoedev *d)
405{
406 struct frame *f, *e, *nf;
407 struct sk_buff *skb;
408 struct aoetgt *ht = *d->htgt;
409
410 f = ht->frames;
411 e = f + ht->nframes;
412 for (; f < e; f++) {
413 if (f->tag == FREETAG)
414 continue;
415 nf = freeframe(d);
416 if (!nf)
417 return 0;
418 skb = nf->skb;
419 *nf = *f;
420 f->skb = skb;
421 f->tag = FREETAG;
422 nf->waited = 0;
423 ht->nout--;
424 (*d->tgt)->nout++;
425 resend(d, *d->tgt, nf);
426 }
427 /* he's clean, he's useless. take away his interfaces */
428 memset(ht->ifs, 0, sizeof ht->ifs);
429 d->htgt = NULL;
430 return 1;
431}
432
433static inline unsigned char
434ata_scnt(unsigned char *packet) {
435 struct aoe_hdr *h;
436 struct aoe_atahdr *ah;
437
438 h = (struct aoe_hdr *) packet;
439 ah = (struct aoe_atahdr *) (h+1);
440 return ah->scnt;
441}
442
355static void 443static void
356rexmit_timer(ulong vp) 444rexmit_timer(ulong vp)
357{ 445{
358 struct aoedev *d; 446 struct aoedev *d;
447 struct aoetgt *t, **tt, **te;
448 struct aoeif *ifp;
359 struct frame *f, *e; 449 struct frame *f, *e;
360 struct sk_buff *sl; 450 struct sk_buff *sl;
361 register long timeout; 451 register long timeout;
@@ -374,31 +464,79 @@ rexmit_timer(ulong vp)
374 spin_unlock_irqrestore(&d->lock, flags); 464 spin_unlock_irqrestore(&d->lock, flags);
375 return; 465 return;
376 } 466 }
377 f = d->frames; 467 tt = d->targets;
378 e = f + d->nframes; 468 te = tt + NTARGETS;
379 for (; f<e; f++) { 469 for (; tt < te && *tt; tt++) {
380 if (f->tag != FREETAG && tsince(f->tag) >= timeout) { 470 t = *tt;
471 f = t->frames;
472 e = f + t->nframes;
473 for (; f < e; f++) {
474 if (f->tag == FREETAG
475 || tsince(f->tag) < timeout)
476 continue;
381 n = f->waited += timeout; 477 n = f->waited += timeout;
382 n /= HZ; 478 n /= HZ;
383 if (n > aoe_deadsecs) { /* waited too long for response */ 479 if (n > aoe_deadsecs) {
480 /* waited too long. device failure. */
384 aoedev_downdev(d); 481 aoedev_downdev(d);
385 break; 482 break;
386 } 483 }
387 rexmit(d, f); 484
485 if (n > HELPWAIT /* see if another target can help */
486 && (tt != d->targets || d->targets[1]))
487 d->htgt = tt;
488
489 if (t->nout == t->maxout) {
490 if (t->maxout > 1)
491 t->maxout--;
492 t->lastwadj = jiffies;
493 }
494
495 ifp = getif(t, f->skb->dev);
496 if (ifp && ++ifp->lost > (t->nframes << 1)
497 && (ifp != t->ifs || t->ifs[1].nd)) {
498 ejectif(t, ifp);
499 ifp = NULL;
500 }
501
502 if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
503 && ifp && ++ifp->lostjumbo > (t->nframes << 1)
504 && ifp->maxbcnt != DEFAULTBCNT) {
505 printk(KERN_INFO
506 "aoe: e%ld.%d: "
507 "too many lost jumbo on "
508 "%s:%012llx - "
509 "falling back to %d frames.\n",
510 d->aoemajor, d->aoeminor,
511 ifp->nd->name, mac_addr(t->addr),
512 DEFAULTBCNT);
513 ifp->maxbcnt = 0;
514 }
515 resend(d, t, f);
516 }
517
518 /* window check */
519 if (t->nout == t->maxout
520 && t->maxout < t->nframes
521 && (jiffies - t->lastwadj)/HZ > 10) {
522 t->maxout++;
523 t->lastwadj = jiffies;
388 } 524 }
389 } 525 }
390 if (d->flags & DEVFL_KICKME) { 526
527 if (d->sendq_hd) {
528 n = d->rttavg <<= 1;
529 if (n > MAXTIMER)
530 d->rttavg = MAXTIMER;
531 }
532
533 if (d->flags & DEVFL_KICKME || d->htgt) {
391 d->flags &= ~DEVFL_KICKME; 534 d->flags &= ~DEVFL_KICKME;
392 aoecmd_work(d); 535 aoecmd_work(d);
393 } 536 }
394 537
395 sl = d->sendq_hd; 538 sl = d->sendq_hd;
396 d->sendq_hd = d->sendq_tl = NULL; 539 d->sendq_hd = d->sendq_tl = NULL;
397 if (sl) {
398 n = d->rttavg <<= 1;
399 if (n > MAXTIMER)
400 d->rttavg = MAXTIMER;
401 }
402 540
403 d->timer.expires = jiffies + TIMERTICK; 541 d->timer.expires = jiffies + TIMERTICK;
404 add_timer(&d->timer); 542 add_timer(&d->timer);
@@ -408,6 +546,25 @@ rexmit_timer(ulong vp)
408 aoenet_xmit(sl); 546 aoenet_xmit(sl);
409} 547}
410 548
549/* enters with d->lock held */
550void
551aoecmd_work(struct aoedev *d)
552{
553 struct buf *buf;
554loop:
555 if (d->htgt && !sthtith(d))
556 return;
557 if (d->inprocess == NULL) {
558 if (list_empty(&d->bufq))
559 return;
560 buf = container_of(d->bufq.next, struct buf, bufs);
561 list_del(d->bufq.next);
562 d->inprocess = buf;
563 }
564 if (aoecmd_ata_rw(d))
565 goto loop;
566}
567
411/* this function performs work that has been deferred until sleeping is OK 568/* this function performs work that has been deferred until sleeping is OK
412 */ 569 */
413void 570void
@@ -440,7 +597,7 @@ aoecmd_sleepwork(struct work_struct *work)
440} 597}
441 598
442static void 599static void
443ataid_complete(struct aoedev *d, unsigned char *id) 600ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
444{ 601{
445 u64 ssize; 602 u64 ssize;
446 u16 n; 603 u16 n;
@@ -476,7 +633,7 @@ ataid_complete(struct aoedev *d, unsigned char *id)
476 633
477 if (d->ssize != ssize) 634 if (d->ssize != ssize)
478 printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n", 635 printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu sectors\n",
479 (unsigned long long)mac_addr(d->addr), 636 (unsigned long long)mac_addr(t->addr),
480 d->aoemajor, d->aoeminor, 637 d->aoemajor, d->aoeminor,
481 d->fw_ver, (long long)ssize); 638 d->fw_ver, (long long)ssize);
482 d->ssize = ssize; 639 d->ssize = ssize;
@@ -484,15 +641,8 @@ ataid_complete(struct aoedev *d, unsigned char *id)
484 if (d->gd != NULL) { 641 if (d->gd != NULL) {
485 d->gd->capacity = ssize; 642 d->gd->capacity = ssize;
486 d->flags |= DEVFL_NEWSIZE; 643 d->flags |= DEVFL_NEWSIZE;
487 } else { 644 } else
488 if (d->flags & DEVFL_GDALLOC) {
489 printk(KERN_ERR "aoe: can't schedule work for e%lu.%lu, %s\n",
490 d->aoemajor, d->aoeminor,
491 "it's already on! This shouldn't happen.\n");
492 return;
493 }
494 d->flags |= DEVFL_GDALLOC; 645 d->flags |= DEVFL_GDALLOC;
495 }
496 schedule_work(&d->work); 646 schedule_work(&d->work);
497} 647}
498 648
@@ -519,6 +669,31 @@ calc_rttavg(struct aoedev *d, int rtt)
519 d->rttavg += n >> 2; 669 d->rttavg += n >> 2;
520} 670}
521 671
672static struct aoetgt *
673gettgt(struct aoedev *d, char *addr)
674{
675 struct aoetgt **t, **e;
676
677 t = d->targets;
678 e = t + NTARGETS;
679 for (; t < e && *t; t++)
680 if (memcmp((*t)->addr, addr, sizeof((*t)->addr)) == 0)
681 return *t;
682 return NULL;
683}
684
685static inline void
686diskstats(struct gendisk *disk, struct bio *bio, ulong duration)
687{
688 unsigned long n_sect = bio->bi_size >> 9;
689 const int rw = bio_data_dir(bio);
690
691 disk_stat_inc(disk, ios[rw]);
692 disk_stat_add(disk, ticks[rw], duration);
693 disk_stat_add(disk, sectors[rw], n_sect);
694 disk_stat_add(disk, io_ticks, duration);
695}
696
522void 697void
523aoecmd_ata_rsp(struct sk_buff *skb) 698aoecmd_ata_rsp(struct sk_buff *skb)
524{ 699{
@@ -528,6 +703,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
528 struct frame *f; 703 struct frame *f;
529 struct buf *buf; 704 struct buf *buf;
530 struct sk_buff *sl; 705 struct sk_buff *sl;
706 struct aoetgt *t;
707 struct aoeif *ifp;
531 register long n; 708 register long n;
532 ulong flags; 709 ulong flags;
533 char ebuf[128]; 710 char ebuf[128];
@@ -547,7 +724,15 @@ aoecmd_ata_rsp(struct sk_buff *skb)
547 spin_lock_irqsave(&d->lock, flags); 724 spin_lock_irqsave(&d->lock, flags);
548 725
549 n = be32_to_cpu(get_unaligned(&hin->tag)); 726 n = be32_to_cpu(get_unaligned(&hin->tag));
550 f = getframe(d, n); 727 t = gettgt(d, hin->src);
728 if (t == NULL) {
729 printk(KERN_INFO "aoe: can't find target e%ld.%d:%012llx\n",
730 d->aoemajor, d->aoeminor,
731 (unsigned long long) mac_addr(hin->src));
732 spin_unlock_irqrestore(&d->lock, flags);
733 return;
734 }
735 f = getframe(t, n);
551 if (f == NULL) { 736 if (f == NULL) {
552 calc_rttavg(d, -tsince(n)); 737 calc_rttavg(d, -tsince(n));
553 spin_unlock_irqrestore(&d->lock, flags); 738 spin_unlock_irqrestore(&d->lock, flags);
@@ -569,8 +754,6 @@ aoecmd_ata_rsp(struct sk_buff *skb)
569 ahout = (struct aoe_atahdr *) (hout+1); 754 ahout = (struct aoe_atahdr *) (hout+1);
570 buf = f->buf; 755 buf = f->buf;
571 756
572 if (ahout->cmdstat == WIN_IDENTIFY)
573 d->flags &= ~DEVFL_PAUSE;
574 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ 757 if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */
575 printk(KERN_ERR 758 printk(KERN_ERR
576 "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n", 759 "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%ld\n",
@@ -579,14 +762,16 @@ aoecmd_ata_rsp(struct sk_buff *skb)
579 if (buf) 762 if (buf)
580 buf->flags |= BUFFL_FAIL; 763 buf->flags |= BUFFL_FAIL;
581 } else { 764 } else {
765 if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
766 d->htgt = NULL;
582 n = ahout->scnt << 9; 767 n = ahout->scnt << 9;
583 switch (ahout->cmdstat) { 768 switch (ahout->cmdstat) {
584 case WIN_READ: 769 case WIN_READ:
585 case WIN_READ_EXT: 770 case WIN_READ_EXT:
586 if (skb->len - sizeof *hin - sizeof *ahin < n) { 771 if (skb->len - sizeof *hin - sizeof *ahin < n) {
587 printk(KERN_ERR 772 printk(KERN_ERR
588 "aoe: runt data size in read. skb->len=%d\n", 773 "aoe: %s. skb->len=%d need=%ld\n",
589 skb->len); 774 "runt data size in read", skb->len, n);
590 /* fail frame f? just returning will rexmit. */ 775 /* fail frame f? just returning will rexmit. */
591 spin_unlock_irqrestore(&d->lock, flags); 776 spin_unlock_irqrestore(&d->lock, flags);
592 return; 777 return;
@@ -594,32 +779,18 @@ aoecmd_ata_rsp(struct sk_buff *skb)
594 memcpy(f->bufaddr, ahin+1, n); 779 memcpy(f->bufaddr, ahin+1, n);
595 case WIN_WRITE: 780 case WIN_WRITE:
596 case WIN_WRITE_EXT: 781 case WIN_WRITE_EXT:
782 ifp = getif(t, skb->dev);
783 if (ifp) {
784 ifp->lost = 0;
785 if (n > DEFAULTBCNT)
786 ifp->lostjumbo = 0;
787 }
597 if (f->bcnt -= n) { 788 if (f->bcnt -= n) {
598 skb = f->skb; 789 f->lba += n >> 9;
599 f->bufaddr += n; 790 f->bufaddr += n;
600 put_lba(ahout, f->lba += ahout->scnt); 791 resend(d, t, f);
601 n = f->bcnt; 792 goto xmit;
602 if (n > DEFAULTBCNT)
603 n = DEFAULTBCNT;
604 ahout->scnt = n >> 9;
605 if (ahout->aflags & AOEAFL_WRITE) {
606 skb_fill_page_desc(skb, 0,
607 virt_to_page(f->bufaddr),
608 offset_in_page(f->bufaddr), n);
609 skb->len = sizeof *hout + sizeof *ahout + n;
610 skb->data_len = n;
611 }
612 f->tag = newtag(d);
613 hout->tag = cpu_to_be32(f->tag);
614 skb->dev = d->ifp;
615 skb = skb_clone(skb, GFP_ATOMIC);
616 spin_unlock_irqrestore(&d->lock, flags);
617 if (skb)
618 aoenet_xmit(skb);
619 return;
620 } 793 }
621 if (n > DEFAULTBCNT)
622 d->lostjumbo = 0;
623 break; 794 break;
624 case WIN_IDENTIFY: 795 case WIN_IDENTIFY:
625 if (skb->len - sizeof *hin - sizeof *ahin < 512) { 796 if (skb->len - sizeof *hin - sizeof *ahin < 512) {
@@ -629,7 +800,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
629 spin_unlock_irqrestore(&d->lock, flags); 800 spin_unlock_irqrestore(&d->lock, flags);
630 return; 801 return;
631 } 802 }
632 ataid_complete(d, (char *) (ahin+1)); 803 ataid_complete(d, t, (char *) (ahin+1));
633 break; 804 break;
634 default: 805 default:
635 printk(KERN_INFO 806 printk(KERN_INFO
@@ -640,28 +811,19 @@ aoecmd_ata_rsp(struct sk_buff *skb)
640 } 811 }
641 } 812 }
642 813
643 if (buf) { 814 if (buf && --buf->nframesout == 0 && buf->resid == 0) {
644 buf->nframesout -= 1; 815 diskstats(d->gd, buf->bio, jiffies - buf->stime);
645 if (buf->nframesout == 0 && buf->resid == 0) { 816 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
646 unsigned long duration = jiffies - buf->start_time; 817 bio_endio(buf->bio, n);
647 unsigned long n_sect = buf->bio->bi_size >> 9; 818 mempool_free(buf, d->bufpool);
648 struct gendisk *disk = d->gd;
649 const int rw = bio_data_dir(buf->bio);
650
651 disk_stat_inc(disk, ios[rw]);
652 disk_stat_add(disk, ticks[rw], duration);
653 disk_stat_add(disk, sectors[rw], n_sect);
654 disk_stat_add(disk, io_ticks, duration);
655 n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
656 bio_endio(buf->bio, n);
657 mempool_free(buf, d->bufpool);
658 }
659 } 819 }
660 820
661 f->buf = NULL; 821 f->buf = NULL;
662 f->tag = FREETAG; 822 f->tag = FREETAG;
823 t->nout--;
663 824
664 aoecmd_work(d); 825 aoecmd_work(d);
826xmit:
665 sl = d->sendq_hd; 827 sl = d->sendq_hd;
666 d->sendq_hd = d->sendq_tl = NULL; 828 d->sendq_hd = d->sendq_tl = NULL;
667 829
@@ -679,23 +841,20 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
679 aoenet_xmit(sl); 841 aoenet_xmit(sl);
680} 842}
681 843
682/* 844struct sk_buff *
683 * Since we only call this in one place (and it only prepares one frame)
684 * we just return the skb. Usually we'd chain it up to the aoedev sendq.
685 */
686static struct sk_buff *
687aoecmd_ata_id(struct aoedev *d) 845aoecmd_ata_id(struct aoedev *d)
688{ 846{
689 struct aoe_hdr *h; 847 struct aoe_hdr *h;
690 struct aoe_atahdr *ah; 848 struct aoe_atahdr *ah;
691 struct frame *f; 849 struct frame *f;
692 struct sk_buff *skb; 850 struct sk_buff *skb;
851 struct aoetgt *t;
693 852
694 f = freeframe(d); 853 f = freeframe(d);
695 if (f == NULL) { 854 if (f == NULL)
696 printk(KERN_ERR "aoe: can't get a frame. This shouldn't happen.\n");
697 return NULL; 855 return NULL;
698 } 856
857 t = *d->tgt;
699 858
700 /* initialize the headers & frame */ 859 /* initialize the headers & frame */
701 skb = f->skb; 860 skb = f->skb;
@@ -703,7 +862,8 @@ aoecmd_ata_id(struct aoedev *d)
703 ah = (struct aoe_atahdr *) (h+1); 862 ah = (struct aoe_atahdr *) (h+1);
704 skb_put(skb, sizeof *h + sizeof *ah); 863 skb_put(skb, sizeof *h + sizeof *ah);
705 memset(h, 0, skb->len); 864 memset(h, 0, skb->len);
706 f->tag = aoehdr_atainit(d, h); 865 f->tag = aoehdr_atainit(d, t, h);
866 t->nout++;
707 f->waited = 0; 867 f->waited = 0;
708 868
709 /* set up ata header */ 869 /* set up ata header */
@@ -711,7 +871,7 @@ aoecmd_ata_id(struct aoedev *d)
711 ah->cmdstat = WIN_IDENTIFY; 871 ah->cmdstat = WIN_IDENTIFY;
712 ah->lba3 = 0xa0; 872 ah->lba3 = 0xa0;
713 873
714 skb->dev = d->ifp; 874 skb->dev = t->ifp->nd;
715 875
716 d->rttavg = MAXTIMER; 876 d->rttavg = MAXTIMER;
717 d->timer.function = rexmit_timer; 877 d->timer.function = rexmit_timer;
@@ -719,12 +879,58 @@ aoecmd_ata_id(struct aoedev *d)
719 return skb_clone(skb, GFP_ATOMIC); 879 return skb_clone(skb, GFP_ATOMIC);
720} 880}
721 881
882static struct aoetgt *
883addtgt(struct aoedev *d, char *addr, ulong nframes)
884{
885 struct aoetgt *t, **tt, **te;
886 struct frame *f, *e;
887
888 tt = d->targets;
889 te = tt + NTARGETS;
890 for (; tt < te && *tt; tt++)
891 ;
892
893 if (tt == te)
894 return NULL;
895
896 t = kcalloc(1, sizeof *t, GFP_ATOMIC);
897 f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
898 if (!t || !f)
899 goto bail;
900 t->nframes = nframes;
901 t->frames = f;
902 e = f + nframes;
903 for (; f < e; f++) {
904 f->tag = FREETAG;
905 f->skb = new_skb(ETH_ZLEN);
906 if (!f->skb)
907 break;
908 }
909 if (f != e) {
910 while (f > t->frames) {
911 f--;
912 dev_kfree_skb(f->skb);
913 }
914 goto bail;
915 }
916 memcpy(t->addr, addr, sizeof t->addr);
917 t->ifp = t->ifs;
918 t->maxout = t->nframes;
919 return *tt = t;
920bail:
921 kfree(t);
922 kfree(f);
923 return NULL;
924}
925
722void 926void
723aoecmd_cfg_rsp(struct sk_buff *skb) 927aoecmd_cfg_rsp(struct sk_buff *skb)
724{ 928{
725 struct aoedev *d; 929 struct aoedev *d;
726 struct aoe_hdr *h; 930 struct aoe_hdr *h;
727 struct aoe_cfghdr *ch; 931 struct aoe_cfghdr *ch;
932 struct aoetgt *t;
933 struct aoeif *ifp;
728 ulong flags, sysminor, aoemajor; 934 ulong flags, sysminor, aoemajor;
729 struct sk_buff *sl; 935 struct sk_buff *sl;
730 enum { MAXFRAMES = 16 }; 936 enum { MAXFRAMES = 16 };
@@ -755,7 +961,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
755 if (n > MAXFRAMES) /* keep it reasonable */ 961 if (n > MAXFRAMES) /* keep it reasonable */
756 n = MAXFRAMES; 962 n = MAXFRAMES;
757 963
758 d = aoedev_by_sysminor_m(sysminor, n); 964 d = aoedev_by_sysminor_m(sysminor);
759 if (d == NULL) { 965 if (d == NULL) {
760 printk(KERN_INFO "aoe: device sysminor_m failure\n"); 966 printk(KERN_INFO "aoe: device sysminor_m failure\n");
761 return; 967 return;
@@ -763,38 +969,77 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
763 969
764 spin_lock_irqsave(&d->lock, flags); 970 spin_lock_irqsave(&d->lock, flags);
765 971
766 /* permit device to migrate mac and network interface */ 972 t = gettgt(d, h->src);
767 d->ifp = skb->dev; 973 if (!t) {
768 memcpy(d->addr, h->src, sizeof d->addr); 974 t = addtgt(d, h->src, n);
769 if (!(d->flags & DEVFL_MAXBCNT)) { 975 if (!t) {
770 n = d->ifp->mtu; 976 printk(KERN_INFO
977 "aoe: device addtgt failure; "
978 "too many targets?\n");
979 spin_unlock_irqrestore(&d->lock, flags);
980 return;
981 }
982 }
983 ifp = getif(t, skb->dev);
984 if (!ifp) {
985 ifp = addif(t, skb->dev);
986 if (!ifp) {
987 printk(KERN_INFO
988 "aoe: device addif failure; "
989 "too many interfaces?\n");
990 spin_unlock_irqrestore(&d->lock, flags);
991 return;
992 }
993 }
994 if (ifp->maxbcnt) {
995 n = ifp->nd->mtu;
771 n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr); 996 n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
772 n /= 512; 997 n /= 512;
773 if (n > ch->scnt) 998 if (n > ch->scnt)
774 n = ch->scnt; 999 n = ch->scnt;
775 n = n ? n * 512 : DEFAULTBCNT; 1000 n = n ? n * 512 : DEFAULTBCNT;
776 if (n != d->maxbcnt) { 1001 if (n != ifp->maxbcnt) {
777 printk(KERN_INFO 1002 printk(KERN_INFO
778 "aoe: e%ld.%ld: setting %d byte data frames on %s\n", 1003 "aoe: e%ld.%d: setting %d%s%s:%012llx\n",
779 d->aoemajor, d->aoeminor, n, d->ifp->name); 1004 d->aoemajor, d->aoeminor, n,
780 d->maxbcnt = n; 1005 " byte data frames on ", ifp->nd->name,
1006 (unsigned long long) mac_addr(t->addr));
1007 ifp->maxbcnt = n;
781 } 1008 }
782 } 1009 }
783 1010
784 /* don't change users' perspective */ 1011 /* don't change users' perspective */
785 if (d->nopen && !(d->flags & DEVFL_PAUSE)) { 1012 if (d->nopen) {
786 spin_unlock_irqrestore(&d->lock, flags); 1013 spin_unlock_irqrestore(&d->lock, flags);
787 return; 1014 return;
788 } 1015 }
789 d->flags |= DEVFL_PAUSE; /* force pause */
790 d->mintimer = MINTIMER;
791 d->fw_ver = be16_to_cpu(ch->fwver); 1016 d->fw_ver = be16_to_cpu(ch->fwver);
792 1017
793 /* check for already outstanding ataid */ 1018 sl = aoecmd_ata_id(d);
794 sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL;
795 1019
796 spin_unlock_irqrestore(&d->lock, flags); 1020 spin_unlock_irqrestore(&d->lock, flags);
797 1021
798 aoenet_xmit(sl); 1022 aoenet_xmit(sl);
799} 1023}
800 1024
1025void
1026aoecmd_cleanslate(struct aoedev *d)
1027{
1028 struct aoetgt **t, **te;
1029 struct aoeif *p, *e;
1030
1031 d->mintimer = MINTIMER;
1032
1033 t = d->targets;
1034 te = t + NTARGETS;
1035 for (; t < te && *t; t++) {
1036 (*t)->maxout = (*t)->nframes;
1037 p = (*t)->ifs;
1038 e = p + NAOEIFS;
1039 for (; p < e; p++) {
1040 p->lostjumbo = 0;
1041 p->lost = 0;
1042 p->maxbcnt = DEFAULTBCNT;
1043 }
1044 }
1045}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 51f50710e5fc..a4d625aefeaa 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -15,15 +15,18 @@ static spinlock_t devlist_lock;
15int 15int
16aoedev_isbusy(struct aoedev *d) 16aoedev_isbusy(struct aoedev *d)
17{ 17{
18 struct aoetgt **t, **te;
18 struct frame *f, *e; 19 struct frame *f, *e;
19 20
20 f = d->frames; 21 t = d->targets;
21 e = f + d->nframes; 22 te = t + NTARGETS;
22 do { 23 for (; t < te && *t; t++) {
23 if (f->tag != FREETAG) 24 f = (*t)->frames;
24 return 1; 25 e = f + (*t)->nframes;
25 } while (++f < e); 26 for (; f < e; f++)
26 27 if (f->tag != FREETAG)
28 return 1;
29 }
27 return 0; 30 return 0;
28} 31}
29 32
@@ -55,75 +58,41 @@ dummy_timer(ulong vp)
55 add_timer(&d->timer); 58 add_timer(&d->timer);
56} 59}
57 60
58/* called with devlist lock held */
59static struct aoedev *
60aoedev_newdev(ulong nframes)
61{
62 struct aoedev *d;
63 struct frame *f, *e;
64
65 d = kzalloc(sizeof *d, GFP_ATOMIC);
66 f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
67 switch (!d || !f) {
68 case 0:
69 d->nframes = nframes;
70 d->frames = f;
71 e = f + nframes;
72 for (; f<e; f++) {
73 f->tag = FREETAG;
74 f->skb = new_skb(ETH_ZLEN);
75 if (!f->skb)
76 break;
77 }
78 if (f == e)
79 break;
80 while (f > d->frames) {
81 f--;
82 dev_kfree_skb(f->skb);
83 }
84 default:
85 if (f)
86 kfree(f);
87 if (d)
88 kfree(d);
89 return NULL;
90 }
91 INIT_WORK(&d->work, aoecmd_sleepwork);
92 spin_lock_init(&d->lock);
93 init_timer(&d->timer);
94 d->timer.data = (ulong) d;
95 d->timer.function = dummy_timer;
96 d->timer.expires = jiffies + HZ;
97 add_timer(&d->timer);
98 d->bufpool = NULL; /* defer to aoeblk_gdalloc */
99 INIT_LIST_HEAD(&d->bufq);
100 d->next = devlist;
101 devlist = d;
102
103 return d;
104}
105
106void 61void
107aoedev_downdev(struct aoedev *d) 62aoedev_downdev(struct aoedev *d)
108{ 63{
64 struct aoetgt **t, **te;
109 struct frame *f, *e; 65 struct frame *f, *e;
110 struct buf *buf; 66 struct buf *buf;
111 struct bio *bio; 67 struct bio *bio;
112 68
113 f = d->frames; 69 t = d->targets;
114 e = f + d->nframes; 70 te = t + NTARGETS;
115 for (; f<e; f->tag = FREETAG, f->buf = NULL, f++) { 71 for (; t < te && *t; t++) {
116 if (f->tag == FREETAG || f->buf == NULL) 72 f = (*t)->frames;
117 continue; 73 e = f + (*t)->nframes;
118 buf = f->buf; 74 for (; f < e; f->tag = FREETAG, f->buf = NULL, f++) {
119 bio = buf->bio; 75 if (f->tag == FREETAG || f->buf == NULL)
120 if (--buf->nframesout == 0) { 76 continue;
121 mempool_free(buf, d->bufpool); 77 buf = f->buf;
122 bio_endio(bio, -EIO); 78 bio = buf->bio;
79 if (--buf->nframesout == 0
80 && buf != d->inprocess) {
81 mempool_free(buf, d->bufpool);
82 bio_endio(bio, -EIO);
83 }
123 } 84 }
124 skb_shinfo(f->skb)->nr_frags = f->skb->data_len = 0; 85 (*t)->maxout = (*t)->nframes;
86 (*t)->nout = 0;
87 }
88 buf = d->inprocess;
89 if (buf) {
90 bio = buf->bio;
91 mempool_free(buf, d->bufpool);
92 bio_endio(bio, -EIO);
125 } 93 }
126 d->inprocess = NULL; 94 d->inprocess = NULL;
95 d->htgt = NULL;
127 96
128 while (!list_empty(&d->bufq)) { 97 while (!list_empty(&d->bufq)) {
129 buf = container_of(d->bufq.next, struct buf, bufs); 98 buf = container_of(d->bufq.next, struct buf, bufs);
@@ -136,12 +105,12 @@ aoedev_downdev(struct aoedev *d)
136 if (d->gd) 105 if (d->gd)
137 d->gd->capacity = 0; 106 d->gd->capacity = 0;
138 107
139 d->flags &= ~(DEVFL_UP | DEVFL_PAUSE); 108 d->flags &= ~DEVFL_UP;
140} 109}
141 110
142/* find it or malloc it */ 111/* find it or malloc it */
143struct aoedev * 112struct aoedev *
144aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt) 113aoedev_by_sysminor_m(ulong sysminor)
145{ 114{
146 struct aoedev *d; 115 struct aoedev *d;
147 ulong flags; 116 ulong flags;
@@ -151,40 +120,61 @@ aoedev_by_sysminor_m(ulong sysminor, ulong bufcnt)
151 for (d=devlist; d; d=d->next) 120 for (d=devlist; d; d=d->next)
152 if (d->sysminor == sysminor) 121 if (d->sysminor == sysminor)
153 break; 122 break;
154 123 if (d)
155 if (d == NULL) { 124 goto out;
156 d = aoedev_newdev(bufcnt); 125 d = kcalloc(1, sizeof *d, GFP_ATOMIC);
157 if (d == NULL) { 126 if (!d)
158 spin_unlock_irqrestore(&devlist_lock, flags); 127 goto out;
159 printk(KERN_INFO "aoe: aoedev_newdev failure.\n"); 128 INIT_WORK(&d->work, aoecmd_sleepwork);
160 return NULL; 129 spin_lock_init(&d->lock);
161 } 130 init_timer(&d->timer);
162 d->sysminor = sysminor; 131 d->timer.data = (ulong) d;
163 d->aoemajor = AOEMAJOR(sysminor); 132 d->timer.function = dummy_timer;
164 d->aoeminor = AOEMINOR(sysminor); 133 d->timer.expires = jiffies + HZ;
165 } 134 add_timer(&d->timer);
166 135 d->bufpool = NULL; /* defer to aoeblk_gdalloc */
136 d->tgt = d->targets;
137 INIT_LIST_HEAD(&d->bufq);
138 d->sysminor = sysminor;
139 d->aoemajor = AOEMAJOR(sysminor);
140 d->aoeminor = AOEMINOR(sysminor);
141 d->mintimer = MINTIMER;
142 d->next = devlist;
143 devlist = d;
144 out:
167 spin_unlock_irqrestore(&devlist_lock, flags); 145 spin_unlock_irqrestore(&devlist_lock, flags);
168 return d; 146 return d;
169} 147}
170 148
171static void 149static void
172aoedev_freedev(struct aoedev *d) 150freetgt(struct aoetgt *t)
173{ 151{
174 struct frame *f, *e; 152 struct frame *f, *e;
175 153
154 f = t->frames;
155 e = f + t->nframes;
156 for (; f < e; f++) {
157 skb_shinfo(f->skb)->nr_frags = 0;
158 dev_kfree_skb(f->skb);
159 }
160 kfree(t->frames);
161 kfree(t);
162}
163
164static void
165aoedev_freedev(struct aoedev *d)
166{
167 struct aoetgt **t, **e;
168
176 if (d->gd) { 169 if (d->gd) {
177 aoedisk_rm_sysfs(d); 170 aoedisk_rm_sysfs(d);
178 del_gendisk(d->gd); 171 del_gendisk(d->gd);
179 put_disk(d->gd); 172 put_disk(d->gd);
180 } 173 }
181 f = d->frames; 174 t = d->targets;
182 e = f + d->nframes; 175 e = t + NTARGETS;
183 for (; f<e; f++) { 176 for (; t < e && *t; t++)
184 skb_shinfo(f->skb)->nr_frags = 0; 177 freetgt(*t);
185 dev_kfree_skb(f->skb);
186 }
187 kfree(d->frames);
188 if (d->bufpool) 178 if (d->bufpool)
189 mempool_destroy(d->bufpool); 179 mempool_destroy(d->bufpool);
190 kfree(d); 180 kfree(d);
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index 4e6deb7f5c24..7a38a45ce110 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -137,9 +137,12 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt,
137 if (n > NECODES) 137 if (n > NECODES)
138 n = 0; 138 n = 0;
139 if (net_ratelimit()) 139 if (net_ratelimit())
140 printk(KERN_ERR "aoe: error packet from %d.%d; ecode=%d '%s'\n", 140 printk(KERN_ERR
141 be16_to_cpu(get_unaligned(&h->major)), h->minor, 141 "%s%d.%d@%s; ecode=%d '%s'\n",
142 h->err, aoe_errlist[n]); 142 "aoe: error packet from ",
143 be16_to_cpu(get_unaligned(&h->major)),
144 h->minor, skb->dev->name,
145 h->err, aoe_errlist[n]);
143 goto exit; 146 goto exit;
144 } 147 }
145 148