diff options
author | Ed Cashin <ecashin@coraid.com> | 2012-10-04 20:16:23 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-05 14:05:25 -0400 |
commit | 69cf2d85de773d998798e47e3335b85e5645d157 (patch) | |
tree | 765eb2be45726e7e098fe73b7f368239c0461342 | |
parent | 896831f5909e2733c13c9cb13a1a215f10c3eaa8 (diff) |
aoe: become I/O request queue handler for increased user control
To allow users to choose an elevator algorithm for their particular
workloads, change from a make_request-style driver to an
I/O-request-queue-handler-style driver.
We have to do a couple of things that might be surprising. We manipulate
the page _count directly on the assumption that we still have no guarantee
that users of the block layer are prohibited from submitting bios
containing pages with zero reference counts.[1] If such a prohibition now
exists, I can get rid of the _count manipulation.
Just as before this patch, we still keep track of the sk_buffs that the
network layer still hasn't finished yet and cap the resources we use with
a "pool" of skbs.[2]
Now that the block layer maintains the disk stats, the aoe driver's
diskstats function can go away.
1. https://lkml.org/lkml/2007/3/1/374
2. https://lkml.org/lkml/2007/7/6/241
Signed-off-by: Ed Cashin <ecashin@coraid.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/block/aoe/aoe.h | 26 | ||||
-rw-r--r-- | drivers/block/aoe/aoeblk.c | 88 | ||||
-rw-r--r-- | drivers/block/aoe/aoechr.c | 1 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 282 | ||||
-rw-r--r-- | drivers/block/aoe/aoedev.c | 93 |
5 files changed, 308 insertions, 182 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index 0cd6c0f7a535..8c4f6d942e05 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h | |||
@@ -90,7 +90,7 @@ enum { | |||
90 | MIN_BUFS = 16, | 90 | MIN_BUFS = 16, |
91 | NTARGETS = 8, | 91 | NTARGETS = 8, |
92 | NAOEIFS = 8, | 92 | NAOEIFS = 8, |
93 | NSKBPOOLMAX = 128, | 93 | NSKBPOOLMAX = 256, |
94 | NFACTIVE = 17, | 94 | NFACTIVE = 17, |
95 | 95 | ||
96 | TIMERTICK = HZ / 10, | 96 | TIMERTICK = HZ / 10, |
@@ -100,30 +100,26 @@ enum { | |||
100 | }; | 100 | }; |
101 | 101 | ||
102 | struct buf { | 102 | struct buf { |
103 | struct list_head bufs; | ||
104 | ulong stime; /* for disk stats */ | ||
105 | ulong flags; | ||
106 | ulong nframesout; | 103 | ulong nframesout; |
107 | ulong resid; | 104 | ulong resid; |
108 | ulong bv_resid; | 105 | ulong bv_resid; |
109 | ulong bv_off; | ||
110 | sector_t sector; | 106 | sector_t sector; |
111 | struct bio *bio; | 107 | struct bio *bio; |
112 | struct bio_vec *bv; | 108 | struct bio_vec *bv; |
109 | struct request *rq; | ||
113 | }; | 110 | }; |
114 | 111 | ||
115 | struct frame { | 112 | struct frame { |
116 | struct list_head head; | 113 | struct list_head head; |
117 | u32 tag; | 114 | u32 tag; |
118 | ulong waited; | 115 | ulong waited; |
119 | struct buf *buf; | ||
120 | struct aoetgt *t; /* parent target I belong to */ | 116 | struct aoetgt *t; /* parent target I belong to */ |
121 | char *bufaddr; | ||
122 | ulong bcnt; | ||
123 | sector_t lba; | 117 | sector_t lba; |
124 | struct sk_buff *skb; /* command skb freed on module exit */ | 118 | struct sk_buff *skb; /* command skb freed on module exit */ |
125 | struct sk_buff *r_skb; /* response skb for async processing */ | 119 | struct sk_buff *r_skb; /* response skb for async processing */ |
120 | struct buf *buf; | ||
126 | struct bio_vec *bv; | 121 | struct bio_vec *bv; |
122 | ulong bcnt; | ||
127 | ulong bv_off; | 123 | ulong bv_off; |
128 | }; | 124 | }; |
129 | 125 | ||
@@ -161,6 +157,7 @@ struct aoedev { | |||
161 | u16 rttavg; /* round trip average of requests/responses */ | 157 | u16 rttavg; /* round trip average of requests/responses */ |
162 | u16 mintimer; | 158 | u16 mintimer; |
163 | u16 fw_ver; /* version of blade's firmware */ | 159 | u16 fw_ver; /* version of blade's firmware */ |
160 | ulong ref; | ||
164 | struct work_struct work;/* disk create work struct */ | 161 | struct work_struct work;/* disk create work struct */ |
165 | struct gendisk *gd; | 162 | struct gendisk *gd; |
166 | struct request_queue *blkq; | 163 | struct request_queue *blkq; |
@@ -168,11 +165,13 @@ struct aoedev { | |||
168 | sector_t ssize; | 165 | sector_t ssize; |
169 | struct timer_list timer; | 166 | struct timer_list timer; |
170 | spinlock_t lock; | 167 | spinlock_t lock; |
171 | struct sk_buff_head sendq; | ||
172 | struct sk_buff_head skbpool; | 168 | struct sk_buff_head skbpool; |
173 | mempool_t *bufpool; /* for deadlock-free Buf allocation */ | 169 | mempool_t *bufpool; /* for deadlock-free Buf allocation */ |
174 | struct list_head bufq; /* queue of bios to work on */ | 170 | struct { /* pointers to work in progress */ |
175 | struct buf *inprocess; /* the one we're currently working on */ | 171 | struct buf *buf; |
172 | struct bio *nxbio; | ||
173 | struct request *rq; | ||
174 | } ip; | ||
176 | struct aoetgt *targets[NTARGETS]; | 175 | struct aoetgt *targets[NTARGETS]; |
177 | struct aoetgt **tgt; /* target in use when working */ | 176 | struct aoetgt **tgt; /* target in use when working */ |
178 | struct aoetgt *htgt; /* target needing rexmit assistance */ | 177 | struct aoetgt *htgt; /* target needing rexmit assistance */ |
@@ -209,6 +208,8 @@ void aoecmd_exit(void); | |||
209 | int aoecmd_init(void); | 208 | int aoecmd_init(void); |
210 | struct sk_buff *aoecmd_ata_id(struct aoedev *); | 209 | struct sk_buff *aoecmd_ata_id(struct aoedev *); |
211 | void aoe_freetframe(struct frame *); | 210 | void aoe_freetframe(struct frame *); |
211 | void aoe_flush_iocq(void); | ||
212 | void aoe_end_request(struct aoedev *, struct request *, int); | ||
212 | 213 | ||
213 | int aoedev_init(void); | 214 | int aoedev_init(void); |
214 | void aoedev_exit(void); | 215 | void aoedev_exit(void); |
@@ -216,7 +217,8 @@ struct aoedev *aoedev_by_aoeaddr(int maj, int min); | |||
216 | struct aoedev *aoedev_by_sysminor_m(ulong sysminor); | 217 | struct aoedev *aoedev_by_sysminor_m(ulong sysminor); |
217 | void aoedev_downdev(struct aoedev *d); | 218 | void aoedev_downdev(struct aoedev *d); |
218 | int aoedev_flush(const char __user *str, size_t size); | 219 | int aoedev_flush(const char __user *str, size_t size); |
219 | void aoe_failbuf(struct aoedev *d, struct buf *buf); | 220 | void aoe_failbuf(struct aoedev *, struct buf *); |
221 | void aoedev_put(struct aoedev *); | ||
220 | 222 | ||
221 | int aoenet_init(void); | 223 | int aoenet_init(void); |
222 | void aoenet_exit(void); | 224 | void aoenet_exit(void); |
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 3a8f0933cc7d..7ec4b8fa28fd 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c | |||
@@ -161,68 +161,22 @@ aoeblk_release(struct gendisk *disk, fmode_t mode) | |||
161 | } | 161 | } |
162 | 162 | ||
163 | static void | 163 | static void |
164 | aoeblk_make_request(struct request_queue *q, struct bio *bio) | 164 | aoeblk_request(struct request_queue *q) |
165 | { | 165 | { |
166 | struct sk_buff_head queue; | ||
167 | struct aoedev *d; | 166 | struct aoedev *d; |
168 | struct buf *buf; | 167 | struct request *rq; |
169 | ulong flags; | ||
170 | |||
171 | blk_queue_bounce(q, &bio); | ||
172 | |||
173 | if (bio == NULL) { | ||
174 | printk(KERN_ERR "aoe: bio is NULL\n"); | ||
175 | BUG(); | ||
176 | return; | ||
177 | } | ||
178 | d = bio->bi_bdev->bd_disk->private_data; | ||
179 | if (d == NULL) { | ||
180 | printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n"); | ||
181 | BUG(); | ||
182 | bio_endio(bio, -ENXIO); | ||
183 | return; | ||
184 | } else if (bio->bi_io_vec == NULL) { | ||
185 | printk(KERN_ERR "aoe: bi_io_vec is NULL\n"); | ||
186 | BUG(); | ||
187 | bio_endio(bio, -ENXIO); | ||
188 | return; | ||
189 | } | ||
190 | buf = mempool_alloc(d->bufpool, GFP_NOIO); | ||
191 | if (buf == NULL) { | ||
192 | printk(KERN_INFO "aoe: buf allocation failure\n"); | ||
193 | bio_endio(bio, -ENOMEM); | ||
194 | return; | ||
195 | } | ||
196 | memset(buf, 0, sizeof(*buf)); | ||
197 | INIT_LIST_HEAD(&buf->bufs); | ||
198 | buf->stime = jiffies; | ||
199 | buf->bio = bio; | ||
200 | buf->resid = bio->bi_size; | ||
201 | buf->sector = bio->bi_sector; | ||
202 | buf->bv = &bio->bi_io_vec[bio->bi_idx]; | ||
203 | buf->bv_resid = buf->bv->bv_len; | ||
204 | WARN_ON(buf->bv_resid == 0); | ||
205 | buf->bv_off = buf->bv->bv_offset; | ||
206 | |||
207 | spin_lock_irqsave(&d->lock, flags); | ||
208 | 168 | ||
169 | d = q->queuedata; | ||
209 | if ((d->flags & DEVFL_UP) == 0) { | 170 | if ((d->flags & DEVFL_UP) == 0) { |
210 | pr_info_ratelimited("aoe: device %ld.%d is not up\n", | 171 | pr_info_ratelimited("aoe: device %ld.%d is not up\n", |
211 | d->aoemajor, d->aoeminor); | 172 | d->aoemajor, d->aoeminor); |
212 | spin_unlock_irqrestore(&d->lock, flags); | 173 | while ((rq = blk_peek_request(q))) { |
213 | mempool_free(buf, d->bufpool); | 174 | blk_start_request(rq); |
214 | bio_endio(bio, -ENXIO); | 175 | aoe_end_request(d, rq, 1); |
176 | } | ||
215 | return; | 177 | return; |
216 | } | 178 | } |
217 | |||
218 | list_add_tail(&buf->bufs, &d->bufq); | ||
219 | |||
220 | aoecmd_work(d); | 179 | aoecmd_work(d); |
221 | __skb_queue_head_init(&queue); | ||
222 | skb_queue_splice_init(&d->sendq, &queue); | ||
223 | |||
224 | spin_unlock_irqrestore(&d->lock, flags); | ||
225 | aoenet_xmit(&queue); | ||
226 | } | 180 | } |
227 | 181 | ||
228 | static int | 182 | static int |
@@ -254,34 +208,46 @@ aoeblk_gdalloc(void *vp) | |||
254 | { | 208 | { |
255 | struct aoedev *d = vp; | 209 | struct aoedev *d = vp; |
256 | struct gendisk *gd; | 210 | struct gendisk *gd; |
257 | enum { KB = 1024, MB = KB * KB, READ_AHEAD = MB, }; | 211 | mempool_t *mp; |
212 | struct request_queue *q; | ||
213 | enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; | ||
258 | ulong flags; | 214 | ulong flags; |
259 | 215 | ||
260 | gd = alloc_disk(AOE_PARTITIONS); | 216 | gd = alloc_disk(AOE_PARTITIONS); |
261 | if (gd == NULL) { | 217 | if (gd == NULL) { |
262 | printk(KERN_ERR | 218 | pr_err("aoe: cannot allocate disk structure for %ld.%d\n", |
263 | "aoe: cannot allocate disk structure for %ld.%d\n", | ||
264 | d->aoemajor, d->aoeminor); | 219 | d->aoemajor, d->aoeminor); |
265 | goto err; | 220 | goto err; |
266 | } | 221 | } |
267 | 222 | ||
268 | d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache); | 223 | mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab, |
269 | if (d->bufpool == NULL) { | 224 | buf_pool_cache); |
225 | if (mp == NULL) { | ||
270 | printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n", | 226 | printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n", |
271 | d->aoemajor, d->aoeminor); | 227 | d->aoemajor, d->aoeminor); |
272 | goto err_disk; | 228 | goto err_disk; |
273 | } | 229 | } |
230 | q = blk_init_queue(aoeblk_request, &d->lock); | ||
231 | if (q == NULL) { | ||
232 | pr_err("aoe: cannot allocate block queue for %ld.%d\n", | ||
233 | d->aoemajor, d->aoeminor); | ||
234 | mempool_destroy(mp); | ||
235 | goto err_disk; | ||
236 | } | ||
274 | 237 | ||
275 | d->blkq = blk_alloc_queue(GFP_KERNEL); | 238 | d->blkq = blk_alloc_queue(GFP_KERNEL); |
276 | if (!d->blkq) | 239 | if (!d->blkq) |
277 | goto err_mempool; | 240 | goto err_mempool; |
278 | blk_queue_make_request(d->blkq, aoeblk_make_request); | ||
279 | d->blkq->backing_dev_info.name = "aoe"; | 241 | d->blkq->backing_dev_info.name = "aoe"; |
280 | if (bdi_init(&d->blkq->backing_dev_info)) | 242 | if (bdi_init(&d->blkq->backing_dev_info)) |
281 | goto err_blkq; | 243 | goto err_blkq; |
282 | spin_lock_irqsave(&d->lock, flags); | 244 | spin_lock_irqsave(&d->lock, flags); |
283 | blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS); | 245 | blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS); |
284 | d->blkq->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; | 246 | q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; |
247 | d->bufpool = mp; | ||
248 | d->blkq = gd->queue = q; | ||
249 | q->queuedata = d; | ||
250 | d->gd = gd; | ||
285 | gd->major = AOE_MAJOR; | 251 | gd->major = AOE_MAJOR; |
286 | gd->first_minor = d->sysminor * AOE_PARTITIONS; | 252 | gd->first_minor = d->sysminor * AOE_PARTITIONS; |
287 | gd->fops = &aoe_bdops; | 253 | gd->fops = &aoe_bdops; |
@@ -290,8 +256,6 @@ aoeblk_gdalloc(void *vp) | |||
290 | snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", | 256 | snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", |
291 | d->aoemajor, d->aoeminor); | 257 | d->aoemajor, d->aoeminor); |
292 | 258 | ||
293 | gd->queue = d->blkq; | ||
294 | d->gd = gd; | ||
295 | d->flags &= ~DEVFL_GDALLOC; | 259 | d->flags &= ~DEVFL_GDALLOC; |
296 | d->flags |= DEVFL_UP; | 260 | d->flags |= DEVFL_UP; |
297 | 261 | ||
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c index f145388cb94a..3557f0d04b46 100644 --- a/drivers/block/aoe/aoechr.c +++ b/drivers/block/aoe/aoechr.c | |||
@@ -106,6 +106,7 @@ loop: | |||
106 | spin_lock_irqsave(&d->lock, flags); | 106 | spin_lock_irqsave(&d->lock, flags); |
107 | goto loop; | 107 | goto loop; |
108 | } | 108 | } |
109 | aoedev_put(d); | ||
109 | if (skb) { | 110 | if (skb) { |
110 | struct sk_buff_head queue; | 111 | struct sk_buff_head queue; |
111 | __skb_queue_head_init(&queue); | 112 | __skb_queue_head_init(&queue); |
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 59b333c902a6..5928a08c1f3f 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c | |||
@@ -23,6 +23,8 @@ | |||
23 | 23 | ||
24 | static void ktcomplete(struct frame *, struct sk_buff *); | 24 | static void ktcomplete(struct frame *, struct sk_buff *); |
25 | 25 | ||
26 | static struct buf *nextbuf(struct aoedev *); | ||
27 | |||
26 | static int aoe_deadsecs = 60 * 3; | 28 | static int aoe_deadsecs = 60 * 3; |
27 | module_param(aoe_deadsecs, int, 0644); | 29 | module_param(aoe_deadsecs, int, 0644); |
28 | MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); | 30 | MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev."); |
@@ -283,17 +285,20 @@ aoecmd_ata_rw(struct aoedev *d) | |||
283 | struct bio_vec *bv; | 285 | struct bio_vec *bv; |
284 | struct aoetgt *t; | 286 | struct aoetgt *t; |
285 | struct sk_buff *skb; | 287 | struct sk_buff *skb; |
288 | struct sk_buff_head queue; | ||
286 | ulong bcnt, fbcnt; | 289 | ulong bcnt, fbcnt; |
287 | char writebit, extbit; | 290 | char writebit, extbit; |
288 | 291 | ||
289 | writebit = 0x10; | 292 | writebit = 0x10; |
290 | extbit = 0x4; | 293 | extbit = 0x4; |
291 | 294 | ||
295 | buf = nextbuf(d); | ||
296 | if (buf == NULL) | ||
297 | return 0; | ||
292 | f = newframe(d); | 298 | f = newframe(d); |
293 | if (f == NULL) | 299 | if (f == NULL) |
294 | return 0; | 300 | return 0; |
295 | t = *d->tgt; | 301 | t = *d->tgt; |
296 | buf = d->inprocess; | ||
297 | bv = buf->bv; | 302 | bv = buf->bv; |
298 | bcnt = t->ifp->maxbcnt; | 303 | bcnt = t->ifp->maxbcnt; |
299 | if (bcnt == 0) | 304 | if (bcnt == 0) |
@@ -312,7 +317,7 @@ aoecmd_ata_rw(struct aoedev *d) | |||
312 | fbcnt -= buf->bv_resid; | 317 | fbcnt -= buf->bv_resid; |
313 | buf->resid -= buf->bv_resid; | 318 | buf->resid -= buf->bv_resid; |
314 | if (buf->resid == 0) { | 319 | if (buf->resid == 0) { |
315 | d->inprocess = NULL; | 320 | d->ip.buf = NULL; |
316 | break; | 321 | break; |
317 | } | 322 | } |
318 | buf->bv++; | 323 | buf->bv++; |
@@ -364,8 +369,11 @@ aoecmd_ata_rw(struct aoedev *d) | |||
364 | 369 | ||
365 | skb->dev = t->ifp->nd; | 370 | skb->dev = t->ifp->nd; |
366 | skb = skb_clone(skb, GFP_ATOMIC); | 371 | skb = skb_clone(skb, GFP_ATOMIC); |
367 | if (skb) | 372 | if (skb) { |
368 | __skb_queue_tail(&d->sendq, skb); | 373 | __skb_queue_head_init(&queue); |
374 | __skb_queue_tail(&queue, skb); | ||
375 | aoenet_xmit(&queue); | ||
376 | } | ||
369 | return 1; | 377 | return 1; |
370 | } | 378 | } |
371 | 379 | ||
@@ -415,6 +423,7 @@ static void | |||
415 | resend(struct aoedev *d, struct frame *f) | 423 | resend(struct aoedev *d, struct frame *f) |
416 | { | 424 | { |
417 | struct sk_buff *skb; | 425 | struct sk_buff *skb; |
426 | struct sk_buff_head queue; | ||
418 | struct aoe_hdr *h; | 427 | struct aoe_hdr *h; |
419 | struct aoe_atahdr *ah; | 428 | struct aoe_atahdr *ah; |
420 | struct aoetgt *t; | 429 | struct aoetgt *t; |
@@ -444,7 +453,9 @@ resend(struct aoedev *d, struct frame *f) | |||
444 | skb = skb_clone(skb, GFP_ATOMIC); | 453 | skb = skb_clone(skb, GFP_ATOMIC); |
445 | if (skb == NULL) | 454 | if (skb == NULL) |
446 | return; | 455 | return; |
447 | __skb_queue_tail(&d->sendq, skb); | 456 | __skb_queue_head_init(&queue); |
457 | __skb_queue_tail(&queue, skb); | ||
458 | aoenet_xmit(&queue); | ||
448 | } | 459 | } |
449 | 460 | ||
450 | static int | 461 | static int |
@@ -554,7 +565,6 @@ ata_scnt(unsigned char *packet) { | |||
554 | static void | 565 | static void |
555 | rexmit_timer(ulong vp) | 566 | rexmit_timer(ulong vp) |
556 | { | 567 | { |
557 | struct sk_buff_head queue; | ||
558 | struct aoedev *d; | 568 | struct aoedev *d; |
559 | struct aoetgt *t, **tt, **te; | 569 | struct aoetgt *t, **tt, **te; |
560 | struct aoeif *ifp; | 570 | struct aoeif *ifp; |
@@ -603,6 +613,12 @@ rexmit_timer(ulong vp) | |||
603 | } | 613 | } |
604 | } | 614 | } |
605 | 615 | ||
616 | if (!list_empty(&flist)) { /* retransmissions necessary */ | ||
617 | n = d->rttavg <<= 1; | ||
618 | if (n > MAXTIMER) | ||
619 | d->rttavg = MAXTIMER; | ||
620 | } | ||
621 | |||
606 | /* process expired frames */ | 622 | /* process expired frames */ |
607 | while (!list_empty(&flist)) { | 623 | while (!list_empty(&flist)) { |
608 | pos = flist.next; | 624 | pos = flist.next; |
@@ -641,45 +657,131 @@ rexmit_timer(ulong vp) | |||
641 | resend(d, f); | 657 | resend(d, f); |
642 | } | 658 | } |
643 | 659 | ||
644 | if (!skb_queue_empty(&d->sendq)) { | 660 | if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) { |
645 | n = d->rttavg <<= 1; | ||
646 | if (n > MAXTIMER) | ||
647 | d->rttavg = MAXTIMER; | ||
648 | } | ||
649 | |||
650 | if (d->flags & DEVFL_KICKME || d->htgt) { | ||
651 | d->flags &= ~DEVFL_KICKME; | 661 | d->flags &= ~DEVFL_KICKME; |
652 | aoecmd_work(d); | 662 | d->blkq->request_fn(d->blkq); |
653 | } | 663 | } |
654 | 664 | ||
655 | __skb_queue_head_init(&queue); | ||
656 | skb_queue_splice_init(&d->sendq, &queue); | ||
657 | |||
658 | d->timer.expires = jiffies + TIMERTICK; | 665 | d->timer.expires = jiffies + TIMERTICK; |
659 | add_timer(&d->timer); | 666 | add_timer(&d->timer); |
660 | 667 | ||
661 | spin_unlock_irqrestore(&d->lock, flags); | 668 | spin_unlock_irqrestore(&d->lock, flags); |
669 | } | ||
662 | 670 | ||
663 | aoenet_xmit(&queue); | 671 | static unsigned long |
672 | rqbiocnt(struct request *r) | ||
673 | { | ||
674 | struct bio *bio; | ||
675 | unsigned long n = 0; | ||
676 | |||
677 | __rq_for_each_bio(bio, r) | ||
678 | n++; | ||
679 | return n; | ||
680 | } | ||
681 | |||
682 | /* This can be removed if we are certain that no users of the block | ||
683 | * layer will ever use zero-count pages in bios. Otherwise we have to | ||
684 | * protect against the put_page sometimes done by the network layer. | ||
685 | * | ||
686 | * See http://oss.sgi.com/archives/xfs/2007-01/msg00594.html for | ||
687 | * discussion. | ||
688 | * | ||
689 | * We cannot use get_page in the workaround, because it insists on a | ||
690 | * positive page count as a precondition. So we use _count directly. | ||
691 | */ | ||
692 | static void | ||
693 | bio_pageinc(struct bio *bio) | ||
694 | { | ||
695 | struct bio_vec *bv; | ||
696 | struct page *page; | ||
697 | int i; | ||
698 | |||
699 | bio_for_each_segment(bv, bio, i) { | ||
700 | page = bv->bv_page; | ||
701 | /* Non-zero page count for non-head members of | ||
702 | * compound pages is no longer allowed by the kernel, | ||
703 | * but this has never been seen here. | ||
704 | */ | ||
705 | if (unlikely(PageCompound(page))) | ||
706 | if (compound_trans_head(page) != page) { | ||
707 | pr_crit("page tail used for block I/O\n"); | ||
708 | BUG(); | ||
709 | } | ||
710 | atomic_inc(&page->_count); | ||
711 | } | ||
712 | } | ||
713 | |||
714 | static void | ||
715 | bio_pagedec(struct bio *bio) | ||
716 | { | ||
717 | struct bio_vec *bv; | ||
718 | int i; | ||
719 | |||
720 | bio_for_each_segment(bv, bio, i) | ||
721 | atomic_dec(&bv->bv_page->_count); | ||
722 | } | ||
723 | |||
724 | static void | ||
725 | bufinit(struct buf *buf, struct request *rq, struct bio *bio) | ||
726 | { | ||
727 | struct bio_vec *bv; | ||
728 | |||
729 | memset(buf, 0, sizeof(*buf)); | ||
730 | buf->rq = rq; | ||
731 | buf->bio = bio; | ||
732 | buf->resid = bio->bi_size; | ||
733 | buf->sector = bio->bi_sector; | ||
734 | bio_pageinc(bio); | ||
735 | buf->bv = bv = &bio->bi_io_vec[bio->bi_idx]; | ||
736 | buf->bv_resid = bv->bv_len; | ||
737 | WARN_ON(buf->bv_resid == 0); | ||
738 | } | ||
739 | |||
740 | static struct buf * | ||
741 | nextbuf(struct aoedev *d) | ||
742 | { | ||
743 | struct request *rq; | ||
744 | struct request_queue *q; | ||
745 | struct buf *buf; | ||
746 | struct bio *bio; | ||
747 | |||
748 | q = d->blkq; | ||
749 | if (q == NULL) | ||
750 | return NULL; /* initializing */ | ||
751 | if (d->ip.buf) | ||
752 | return d->ip.buf; | ||
753 | rq = d->ip.rq; | ||
754 | if (rq == NULL) { | ||
755 | rq = blk_peek_request(q); | ||
756 | if (rq == NULL) | ||
757 | return NULL; | ||
758 | blk_start_request(rq); | ||
759 | d->ip.rq = rq; | ||
760 | d->ip.nxbio = rq->bio; | ||
761 | rq->special = (void *) rqbiocnt(rq); | ||
762 | } | ||
763 | buf = mempool_alloc(d->bufpool, GFP_ATOMIC); | ||
764 | if (buf == NULL) { | ||
765 | pr_err("aoe: nextbuf: unable to mempool_alloc!\n"); | ||
766 | return NULL; | ||
767 | } | ||
768 | bio = d->ip.nxbio; | ||
769 | bufinit(buf, rq, bio); | ||
770 | bio = bio->bi_next; | ||
771 | d->ip.nxbio = bio; | ||
772 | if (bio == NULL) | ||
773 | d->ip.rq = NULL; | ||
774 | return d->ip.buf = buf; | ||
664 | } | 775 | } |
665 | 776 | ||
666 | /* enters with d->lock held */ | 777 | /* enters with d->lock held */ |
667 | void | 778 | void |
668 | aoecmd_work(struct aoedev *d) | 779 | aoecmd_work(struct aoedev *d) |
669 | { | 780 | { |
670 | struct buf *buf; | ||
671 | loop: | ||
672 | if (d->htgt && !sthtith(d)) | 781 | if (d->htgt && !sthtith(d)) |
673 | return; | 782 | return; |
674 | if (d->inprocess == NULL) { | 783 | while (aoecmd_ata_rw(d)) |
675 | if (list_empty(&d->bufq)) | 784 | ; |
676 | return; | ||
677 | buf = container_of(d->bufq.next, struct buf, bufs); | ||
678 | list_del(d->bufq.next); | ||
679 | d->inprocess = buf; | ||
680 | } | ||
681 | if (aoecmd_ata_rw(d)) | ||
682 | goto loop; | ||
683 | } | 785 | } |
684 | 786 | ||
685 | /* this function performs work that has been deferred until sleeping is OK | 787 | /* this function performs work that has been deferred until sleeping is OK |
@@ -802,25 +904,6 @@ gettgt(struct aoedev *d, char *addr) | |||
802 | return NULL; | 904 | return NULL; |
803 | } | 905 | } |
804 | 906 | ||
805 | static inline void | ||
806 | diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector) | ||
807 | { | ||
808 | unsigned long n_sect = bio->bi_size >> 9; | ||
809 | const int rw = bio_data_dir(bio); | ||
810 | struct hd_struct *part; | ||
811 | int cpu; | ||
812 | |||
813 | cpu = part_stat_lock(); | ||
814 | part = disk_map_sector_rcu(disk, sector); | ||
815 | |||
816 | part_stat_inc(cpu, part, ios[rw]); | ||
817 | part_stat_add(cpu, part, ticks[rw], duration); | ||
818 | part_stat_add(cpu, part, sectors[rw], n_sect); | ||
819 | part_stat_add(cpu, part, io_ticks, duration); | ||
820 | |||
821 | part_stat_unlock(); | ||
822 | } | ||
823 | |||
824 | static void | 907 | static void |
825 | bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt) | 908 | bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt) |
826 | { | 909 | { |
@@ -842,6 +925,43 @@ loop: | |||
842 | goto loop; | 925 | goto loop; |
843 | } | 926 | } |
844 | 927 | ||
928 | void | ||
929 | aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) | ||
930 | { | ||
931 | struct bio *bio; | ||
932 | int bok; | ||
933 | struct request_queue *q; | ||
934 | |||
935 | q = d->blkq; | ||
936 | if (rq == d->ip.rq) | ||
937 | d->ip.rq = NULL; | ||
938 | do { | ||
939 | bio = rq->bio; | ||
940 | bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
941 | } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_size)); | ||
942 | |||
943 | /* cf. http://lkml.org/lkml/2006/10/31/28 */ | ||
944 | if (!fastfail) | ||
945 | q->request_fn(q); | ||
946 | } | ||
947 | |||
948 | static void | ||
949 | aoe_end_buf(struct aoedev *d, struct buf *buf) | ||
950 | { | ||
951 | struct request *rq; | ||
952 | unsigned long n; | ||
953 | |||
954 | if (buf == d->ip.buf) | ||
955 | d->ip.buf = NULL; | ||
956 | rq = buf->rq; | ||
957 | bio_pagedec(buf->bio); | ||
958 | mempool_free(buf, d->bufpool); | ||
959 | n = (unsigned long) rq->special; | ||
960 | rq->special = (void *) --n; | ||
961 | if (n == 0) | ||
962 | aoe_end_request(d, rq, 0); | ||
963 | } | ||
964 | |||
845 | static void | 965 | static void |
846 | ktiocomplete(struct frame *f) | 966 | ktiocomplete(struct frame *f) |
847 | { | 967 | { |
@@ -876,7 +996,7 @@ ktiocomplete(struct frame *f) | |||
876 | ahout->cmdstat, ahin->cmdstat, | 996 | ahout->cmdstat, ahin->cmdstat, |
877 | d->aoemajor, d->aoeminor); | 997 | d->aoemajor, d->aoeminor); |
878 | noskb: if (buf) | 998 | noskb: if (buf) |
879 | buf->flags |= BUFFL_FAIL; | 999 | clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); |
880 | goto badrsp; | 1000 | goto badrsp; |
881 | } | 1001 | } |
882 | 1002 | ||
@@ -887,7 +1007,7 @@ noskb: if (buf) | |||
887 | if (skb->len < n) { | 1007 | if (skb->len < n) { |
888 | pr_err("aoe: runt data size in read. skb->len=%d need=%ld\n", | 1008 | pr_err("aoe: runt data size in read. skb->len=%d need=%ld\n", |
889 | skb->len, n); | 1009 | skb->len, n); |
890 | buf->flags |= BUFFL_FAIL; | 1010 | clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); |
891 | break; | 1011 | break; |
892 | } | 1012 | } |
893 | bvcpy(f->bv, f->bv_off, skb, n); | 1013 | bvcpy(f->bv, f->bv_off, skb, n); |
@@ -927,18 +1047,13 @@ badrsp: | |||
927 | 1047 | ||
928 | aoe_freetframe(f); | 1048 | aoe_freetframe(f); |
929 | 1049 | ||
930 | if (buf && --buf->nframesout == 0 && buf->resid == 0) { | 1050 | if (buf && --buf->nframesout == 0 && buf->resid == 0) |
931 | struct bio *bio = buf->bio; | 1051 | aoe_end_buf(d, buf); |
932 | 1052 | ||
933 | diskstats(d->gd, bio, jiffies - buf->stime, buf->sector); | 1053 | aoecmd_work(d); |
934 | n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; | 1054 | |
935 | mempool_free(buf, d->bufpool); | 1055 | spin_unlock_irq(&d->lock); |
936 | spin_unlock_irq(&d->lock); | 1056 | aoedev_put(d); |
937 | if (n != -EIO) | ||
938 | bio_flush_dcache_pages(buf->bio); | ||
939 | bio_endio(bio, n); | ||
940 | } else | ||
941 | spin_unlock_irq(&d->lock); | ||
942 | dev_kfree_skb(skb); | 1057 | dev_kfree_skb(skb); |
943 | } | 1058 | } |
944 | 1059 | ||
@@ -1061,12 +1176,14 @@ aoecmd_ata_rsp(struct sk_buff *skb) | |||
1061 | printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n", | 1176 | printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n", |
1062 | d->aoemajor, d->aoeminor, h->src); | 1177 | d->aoemajor, d->aoeminor, h->src); |
1063 | spin_unlock_irqrestore(&d->lock, flags); | 1178 | spin_unlock_irqrestore(&d->lock, flags); |
1179 | aoedev_put(d); | ||
1064 | return skb; | 1180 | return skb; |
1065 | } | 1181 | } |
1066 | f = getframe(t, n); | 1182 | f = getframe(t, n); |
1067 | if (f == NULL) { | 1183 | if (f == NULL) { |
1068 | calc_rttavg(d, -tsince(n)); | 1184 | calc_rttavg(d, -tsince(n)); |
1069 | spin_unlock_irqrestore(&d->lock, flags); | 1185 | spin_unlock_irqrestore(&d->lock, flags); |
1186 | aoedev_put(d); | ||
1070 | snprintf(ebuf, sizeof ebuf, | 1187 | snprintf(ebuf, sizeof ebuf, |
1071 | "%15s e%d.%d tag=%08x@%08lx\n", | 1188 | "%15s e%d.%d tag=%08x@%08lx\n", |
1072 | "unexpected rsp", | 1189 | "unexpected rsp", |
@@ -1185,8 +1302,10 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
1185 | struct aoeif *ifp; | 1302 | struct aoeif *ifp; |
1186 | ulong flags, sysminor, aoemajor; | 1303 | ulong flags, sysminor, aoemajor; |
1187 | struct sk_buff *sl; | 1304 | struct sk_buff *sl; |
1305 | struct sk_buff_head queue; | ||
1188 | u16 n; | 1306 | u16 n; |
1189 | 1307 | ||
1308 | sl = NULL; | ||
1190 | h = (struct aoe_hdr *) skb_mac_header(skb); | 1309 | h = (struct aoe_hdr *) skb_mac_header(skb); |
1191 | ch = (struct aoe_cfghdr *) (h+1); | 1310 | ch = (struct aoe_cfghdr *) (h+1); |
1192 | 1311 | ||
@@ -1223,10 +1342,8 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
1223 | t = gettgt(d, h->src); | 1342 | t = gettgt(d, h->src); |
1224 | if (!t) { | 1343 | if (!t) { |
1225 | t = addtgt(d, h->src, n); | 1344 | t = addtgt(d, h->src, n); |
1226 | if (!t) { | 1345 | if (!t) |
1227 | spin_unlock_irqrestore(&d->lock, flags); | 1346 | goto bail; |
1228 | return; | ||
1229 | } | ||
1230 | } | 1347 | } |
1231 | ifp = getif(t, skb->dev); | 1348 | ifp = getif(t, skb->dev); |
1232 | if (!ifp) { | 1349 | if (!ifp) { |
@@ -1235,8 +1352,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
1235 | printk(KERN_INFO | 1352 | printk(KERN_INFO |
1236 | "aoe: device addif failure; " | 1353 | "aoe: device addif failure; " |
1237 | "too many interfaces?\n"); | 1354 | "too many interfaces?\n"); |
1238 | spin_unlock_irqrestore(&d->lock, flags); | 1355 | goto bail; |
1239 | return; | ||
1240 | } | 1356 | } |
1241 | } | 1357 | } |
1242 | if (ifp->maxbcnt) { | 1358 | if (ifp->maxbcnt) { |
@@ -1257,18 +1373,14 @@ aoecmd_cfg_rsp(struct sk_buff *skb) | |||
1257 | } | 1373 | } |
1258 | 1374 | ||
1259 | /* don't change users' perspective */ | 1375 | /* don't change users' perspective */ |
1260 | if (d->nopen) { | 1376 | if (d->nopen == 0) { |
1261 | spin_unlock_irqrestore(&d->lock, flags); | 1377 | d->fw_ver = be16_to_cpu(ch->fwver); |
1262 | return; | 1378 | sl = aoecmd_ata_id(d); |
1263 | } | 1379 | } |
1264 | d->fw_ver = be16_to_cpu(ch->fwver); | 1380 | bail: |
1265 | |||
1266 | sl = aoecmd_ata_id(d); | ||
1267 | |||
1268 | spin_unlock_irqrestore(&d->lock, flags); | 1381 | spin_unlock_irqrestore(&d->lock, flags); |
1269 | 1382 | aoedev_put(d); | |
1270 | if (sl) { | 1383 | if (sl) { |
1271 | struct sk_buff_head queue; | ||
1272 | __skb_queue_head_init(&queue); | 1384 | __skb_queue_head_init(&queue); |
1273 | __skb_queue_tail(&queue, sl); | 1385 | __skb_queue_tail(&queue, sl); |
1274 | aoenet_xmit(&queue); | 1386 | aoenet_xmit(&queue); |
@@ -1297,8 +1409,19 @@ aoecmd_cleanslate(struct aoedev *d) | |||
1297 | } | 1409 | } |
1298 | } | 1410 | } |
1299 | 1411 | ||
1300 | static void | 1412 | void |
1301 | flush_iocq(void) | 1413 | aoe_failbuf(struct aoedev *d, struct buf *buf) |
1414 | { | ||
1415 | if (buf == NULL) | ||
1416 | return; | ||
1417 | buf->resid = 0; | ||
1418 | clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); | ||
1419 | if (buf->nframesout == 0) | ||
1420 | aoe_end_buf(d, buf); | ||
1421 | } | ||
1422 | |||
1423 | void | ||
1424 | aoe_flush_iocq(void) | ||
1302 | { | 1425 | { |
1303 | struct frame *f; | 1426 | struct frame *f; |
1304 | struct aoedev *d; | 1427 | struct aoedev *d; |
@@ -1324,6 +1447,7 @@ flush_iocq(void) | |||
1324 | aoe_freetframe(f); | 1447 | aoe_freetframe(f); |
1325 | spin_unlock_irqrestore(&d->lock, flags); | 1448 | spin_unlock_irqrestore(&d->lock, flags); |
1326 | dev_kfree_skb(skb); | 1449 | dev_kfree_skb(skb); |
1450 | aoedev_put(d); | ||
1327 | } | 1451 | } |
1328 | } | 1452 | } |
1329 | 1453 | ||
@@ -1344,5 +1468,5 @@ void | |||
1344 | aoecmd_exit(void) | 1468 | aoecmd_exit(void) |
1345 | { | 1469 | { |
1346 | aoe_ktstop(&kts); | 1470 | aoe_ktstop(&kts); |
1347 | flush_iocq(); | 1471 | aoe_flush_iocq(); |
1348 | } | 1472 | } |
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 40bae1a1ff1e..635dc986cf77 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c | |||
@@ -19,6 +19,17 @@ static void skbpoolfree(struct aoedev *d); | |||
19 | static struct aoedev *devlist; | 19 | static struct aoedev *devlist; |
20 | static DEFINE_SPINLOCK(devlist_lock); | 20 | static DEFINE_SPINLOCK(devlist_lock); |
21 | 21 | ||
22 | /* | ||
23 | * Users who grab a pointer to the device with aoedev_by_aoeaddr or | ||
24 | * aoedev_by_sysminor_m automatically get a reference count and must | ||
25 | * be responsible for performing a aoedev_put. With the addition of | ||
26 | * async kthread processing I'm no longer confident that we can | ||
27 | * guarantee consistency in the face of device flushes. | ||
28 | * | ||
29 | * For the time being, we only bother to add extra references for | ||
30 | * frames sitting on the iocq. When the kthreads finish processing | ||
31 | * these frames, they will aoedev_put the device. | ||
32 | */ | ||
22 | struct aoedev * | 33 | struct aoedev * |
23 | aoedev_by_aoeaddr(int maj, int min) | 34 | aoedev_by_aoeaddr(int maj, int min) |
24 | { | 35 | { |
@@ -28,13 +39,25 @@ aoedev_by_aoeaddr(int maj, int min) | |||
28 | spin_lock_irqsave(&devlist_lock, flags); | 39 | spin_lock_irqsave(&devlist_lock, flags); |
29 | 40 | ||
30 | for (d=devlist; d; d=d->next) | 41 | for (d=devlist; d; d=d->next) |
31 | if (d->aoemajor == maj && d->aoeminor == min) | 42 | if (d->aoemajor == maj && d->aoeminor == min) { |
43 | d->ref++; | ||
32 | break; | 44 | break; |
45 | } | ||
33 | 46 | ||
34 | spin_unlock_irqrestore(&devlist_lock, flags); | 47 | spin_unlock_irqrestore(&devlist_lock, flags); |
35 | return d; | 48 | return d; |
36 | } | 49 | } |
37 | 50 | ||
51 | void | ||
52 | aoedev_put(struct aoedev *d) | ||
53 | { | ||
54 | ulong flags; | ||
55 | |||
56 | spin_lock_irqsave(&devlist_lock, flags); | ||
57 | d->ref--; | ||
58 | spin_unlock_irqrestore(&devlist_lock, flags); | ||
59 | } | ||
60 | |||
38 | static void | 61 | static void |
39 | dummy_timer(ulong vp) | 62 | dummy_timer(ulong vp) |
40 | { | 63 | { |
@@ -47,21 +70,26 @@ dummy_timer(ulong vp) | |||
47 | add_timer(&d->timer); | 70 | add_timer(&d->timer); |
48 | } | 71 | } |
49 | 72 | ||
50 | void | 73 | static void |
51 | aoe_failbuf(struct aoedev *d, struct buf *buf) | 74 | aoe_failip(struct aoedev *d) |
52 | { | 75 | { |
76 | struct request *rq; | ||
53 | struct bio *bio; | 77 | struct bio *bio; |
78 | unsigned long n; | ||
79 | |||
80 | aoe_failbuf(d, d->ip.buf); | ||
54 | 81 | ||
55 | if (buf == NULL) | 82 | rq = d->ip.rq; |
83 | if (rq == NULL) | ||
56 | return; | 84 | return; |
57 | buf->flags |= BUFFL_FAIL; | 85 | while ((bio = d->ip.nxbio)) { |
58 | if (buf->nframesout == 0) { | 86 | clear_bit(BIO_UPTODATE, &bio->bi_flags); |
59 | if (buf == d->inprocess) /* ensure we only process this once */ | 87 | d->ip.nxbio = bio->bi_next; |
60 | d->inprocess = NULL; | 88 | n = (unsigned long) rq->special; |
61 | bio = buf->bio; | 89 | rq->special = (void *) --n; |
62 | mempool_free(buf, d->bufpool); | ||
63 | bio_endio(bio, -EIO); | ||
64 | } | 90 | } |
91 | if ((unsigned long) rq->special == 0) | ||
92 | aoe_end_request(d, rq, 0); | ||
65 | } | 93 | } |
66 | 94 | ||
67 | void | 95 | void |
@@ -70,8 +98,11 @@ aoedev_downdev(struct aoedev *d) | |||
70 | struct aoetgt *t, **tt, **te; | 98 | struct aoetgt *t, **tt, **te; |
71 | struct frame *f; | 99 | struct frame *f; |
72 | struct list_head *head, *pos, *nx; | 100 | struct list_head *head, *pos, *nx; |
101 | struct request *rq; | ||
73 | int i; | 102 | int i; |
74 | 103 | ||
104 | d->flags &= ~DEVFL_UP; | ||
105 | |||
75 | /* clean out active buffers on all targets */ | 106 | /* clean out active buffers on all targets */ |
76 | tt = d->targets; | 107 | tt = d->targets; |
77 | te = tt + NTARGETS; | 108 | te = tt + NTARGETS; |
@@ -92,22 +123,20 @@ aoedev_downdev(struct aoedev *d) | |||
92 | t->nout = 0; | 123 | t->nout = 0; |
93 | } | 124 | } |
94 | 125 | ||
95 | /* clean out the in-process buffer (if any) */ | 126 | /* clean out the in-process request (if any) */ |
96 | aoe_failbuf(d, d->inprocess); | 127 | aoe_failip(d); |
97 | d->inprocess = NULL; | ||
98 | d->htgt = NULL; | 128 | d->htgt = NULL; |
99 | 129 | ||
100 | /* clean out all pending I/O */ | 130 | /* fast fail all pending I/O */ |
101 | while (!list_empty(&d->bufq)) { | 131 | if (d->blkq) { |
102 | struct buf *buf = container_of(d->bufq.next, struct buf, bufs); | 132 | while ((rq = blk_peek_request(d->blkq))) { |
103 | list_del(d->bufq.next); | 133 | blk_start_request(rq); |
104 | aoe_failbuf(d, buf); | 134 | aoe_end_request(d, rq, 1); |
135 | } | ||
105 | } | 136 | } |
106 | 137 | ||
107 | if (d->gd) | 138 | if (d->gd) |
108 | set_capacity(d->gd, 0); | 139 | set_capacity(d->gd, 0); |
109 | |||
110 | d->flags &= ~DEVFL_UP; | ||
111 | } | 140 | } |
112 | 141 | ||
113 | static void | 142 | static void |
@@ -120,6 +149,7 @@ aoedev_freedev(struct aoedev *d) | |||
120 | aoedisk_rm_sysfs(d); | 149 | aoedisk_rm_sysfs(d); |
121 | del_gendisk(d->gd); | 150 | del_gendisk(d->gd); |
122 | put_disk(d->gd); | 151 | put_disk(d->gd); |
152 | blk_cleanup_queue(d->blkq); | ||
123 | } | 153 | } |
124 | t = d->targets; | 154 | t = d->targets; |
125 | e = t + NTARGETS; | 155 | e = t + NTARGETS; |
@@ -128,7 +158,6 @@ aoedev_freedev(struct aoedev *d) | |||
128 | if (d->bufpool) | 158 | if (d->bufpool) |
129 | mempool_destroy(d->bufpool); | 159 | mempool_destroy(d->bufpool); |
130 | skbpoolfree(d); | 160 | skbpoolfree(d); |
131 | blk_cleanup_queue(d->blkq); | ||
132 | kfree(d); | 161 | kfree(d); |
133 | } | 162 | } |
134 | 163 | ||
@@ -155,7 +184,8 @@ aoedev_flush(const char __user *str, size_t cnt) | |||
155 | spin_lock(&d->lock); | 184 | spin_lock(&d->lock); |
156 | if ((!all && (d->flags & DEVFL_UP)) | 185 | if ((!all && (d->flags & DEVFL_UP)) |
157 | || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) | 186 | || (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) |
158 | || d->nopen) { | 187 | || d->nopen |
188 | || d->ref) { | ||
159 | spin_unlock(&d->lock); | 189 | spin_unlock(&d->lock); |
160 | dd = &d->next; | 190 | dd = &d->next; |
161 | continue; | 191 | continue; |
@@ -176,12 +206,15 @@ aoedev_flush(const char __user *str, size_t cnt) | |||
176 | return 0; | 206 | return 0; |
177 | } | 207 | } |
178 | 208 | ||
179 | /* I'm not really sure that this is a realistic problem, but if the | 209 | /* This has been confirmed to occur once with Tms=3*1000 due to the |
180 | network driver goes gonzo let's just leak memory after complaining. */ | 210 | * driver changing link and not processing its transmit ring. The |
211 | * problem is hard enough to solve by returning an error that I'm | ||
212 | * still punting on "solving" this. | ||
213 | */ | ||
181 | static void | 214 | static void |
182 | skbfree(struct sk_buff *skb) | 215 | skbfree(struct sk_buff *skb) |
183 | { | 216 | { |
184 | enum { Sms = 100, Tms = 3*1000}; | 217 | enum { Sms = 250, Tms = 30 * 1000}; |
185 | int i = Tms / Sms; | 218 | int i = Tms / Sms; |
186 | 219 | ||
187 | if (skb == NULL) | 220 | if (skb == NULL) |
@@ -222,8 +255,10 @@ aoedev_by_sysminor_m(ulong sysminor) | |||
222 | spin_lock_irqsave(&devlist_lock, flags); | 255 | spin_lock_irqsave(&devlist_lock, flags); |
223 | 256 | ||
224 | for (d=devlist; d; d=d->next) | 257 | for (d=devlist; d; d=d->next) |
225 | if (d->sysminor == sysminor) | 258 | if (d->sysminor == sysminor) { |
259 | d->ref++; | ||
226 | break; | 260 | break; |
261 | } | ||
227 | if (d) | 262 | if (d) |
228 | goto out; | 263 | goto out; |
229 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); | 264 | d = kcalloc(1, sizeof *d, GFP_ATOMIC); |
@@ -231,7 +266,6 @@ aoedev_by_sysminor_m(ulong sysminor) | |||
231 | goto out; | 266 | goto out; |
232 | INIT_WORK(&d->work, aoecmd_sleepwork); | 267 | INIT_WORK(&d->work, aoecmd_sleepwork); |
233 | spin_lock_init(&d->lock); | 268 | spin_lock_init(&d->lock); |
234 | skb_queue_head_init(&d->sendq); | ||
235 | skb_queue_head_init(&d->skbpool); | 269 | skb_queue_head_init(&d->skbpool); |
236 | init_timer(&d->timer); | 270 | init_timer(&d->timer); |
237 | d->timer.data = (ulong) d; | 271 | d->timer.data = (ulong) d; |
@@ -240,7 +274,7 @@ aoedev_by_sysminor_m(ulong sysminor) | |||
240 | add_timer(&d->timer); | 274 | add_timer(&d->timer); |
241 | d->bufpool = NULL; /* defer to aoeblk_gdalloc */ | 275 | d->bufpool = NULL; /* defer to aoeblk_gdalloc */ |
242 | d->tgt = d->targets; | 276 | d->tgt = d->targets; |
243 | INIT_LIST_HEAD(&d->bufq); | 277 | d->ref = 1; |
244 | d->sysminor = sysminor; | 278 | d->sysminor = sysminor; |
245 | d->aoemajor = AOEMAJOR(sysminor); | 279 | d->aoemajor = AOEMAJOR(sysminor); |
246 | d->aoeminor = AOEMINOR(sysminor); | 280 | d->aoeminor = AOEMINOR(sysminor); |
@@ -274,6 +308,7 @@ aoedev_exit(void) | |||
274 | struct aoedev *d; | 308 | struct aoedev *d; |
275 | ulong flags; | 309 | ulong flags; |
276 | 310 | ||
311 | aoe_flush_iocq(); | ||
277 | while ((d = devlist)) { | 312 | while ((d = devlist)) { |
278 | devlist = d->next; | 313 | devlist = d->next; |
279 | 314 | ||