diff options
author | Ed Cashin <ecashin@coraid.com> | 2012-10-04 20:16:23 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-05 14:05:25 -0400 |
commit | 69cf2d85de773d998798e47e3335b85e5645d157 (patch) | |
tree | 765eb2be45726e7e098fe73b7f368239c0461342 /drivers/block/aoe/aoeblk.c | |
parent | 896831f5909e2733c13c9cb13a1a215f10c3eaa8 (diff) |
aoe: become I/O request queue handler for increased user control
To allow users to choose an elevator algorithm for their particular
workloads, change from a make_request-style driver to an
I/O-request-queue-handler-style driver.
We have to do a couple of things that might be surprising. We manipulate
the page _count directly on the assumption that we still have no guarantee
that users of the block layer are prohibited from submitting bios
containing pages with zero reference counts.[1] If such a prohibition now
exists, I can get rid of the _count manipulation.
Just as before this patch, we still keep track of the sk_buffs that the
network layer still hasn't finished yet and cap the resources we use with
a "pool" of skbs.[2]
Now that the block layer maintains the disk stats, the aoe driver's
diskstats function can go away.
1. https://lkml.org/lkml/2007/3/1/374
2. https://lkml.org/lkml/2007/7/6/241
Signed-off-by: Ed Cashin <ecashin@coraid.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/block/aoe/aoeblk.c')
-rw-r--r-- | drivers/block/aoe/aoeblk.c | 88 |
1 files changed, 26 insertions, 62 deletions
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 3a8f0933cc7d..7ec4b8fa28fd 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c | |||
@@ -161,68 +161,22 @@ aoeblk_release(struct gendisk *disk, fmode_t mode) | |||
161 | } | 161 | } |
162 | 162 | ||
163 | static void | 163 | static void |
164 | aoeblk_make_request(struct request_queue *q, struct bio *bio) | 164 | aoeblk_request(struct request_queue *q) |
165 | { | 165 | { |
166 | struct sk_buff_head queue; | ||
167 | struct aoedev *d; | 166 | struct aoedev *d; |
168 | struct buf *buf; | 167 | struct request *rq; |
169 | ulong flags; | ||
170 | |||
171 | blk_queue_bounce(q, &bio); | ||
172 | |||
173 | if (bio == NULL) { | ||
174 | printk(KERN_ERR "aoe: bio is NULL\n"); | ||
175 | BUG(); | ||
176 | return; | ||
177 | } | ||
178 | d = bio->bi_bdev->bd_disk->private_data; | ||
179 | if (d == NULL) { | ||
180 | printk(KERN_ERR "aoe: bd_disk->private_data is NULL\n"); | ||
181 | BUG(); | ||
182 | bio_endio(bio, -ENXIO); | ||
183 | return; | ||
184 | } else if (bio->bi_io_vec == NULL) { | ||
185 | printk(KERN_ERR "aoe: bi_io_vec is NULL\n"); | ||
186 | BUG(); | ||
187 | bio_endio(bio, -ENXIO); | ||
188 | return; | ||
189 | } | ||
190 | buf = mempool_alloc(d->bufpool, GFP_NOIO); | ||
191 | if (buf == NULL) { | ||
192 | printk(KERN_INFO "aoe: buf allocation failure\n"); | ||
193 | bio_endio(bio, -ENOMEM); | ||
194 | return; | ||
195 | } | ||
196 | memset(buf, 0, sizeof(*buf)); | ||
197 | INIT_LIST_HEAD(&buf->bufs); | ||
198 | buf->stime = jiffies; | ||
199 | buf->bio = bio; | ||
200 | buf->resid = bio->bi_size; | ||
201 | buf->sector = bio->bi_sector; | ||
202 | buf->bv = &bio->bi_io_vec[bio->bi_idx]; | ||
203 | buf->bv_resid = buf->bv->bv_len; | ||
204 | WARN_ON(buf->bv_resid == 0); | ||
205 | buf->bv_off = buf->bv->bv_offset; | ||
206 | |||
207 | spin_lock_irqsave(&d->lock, flags); | ||
208 | 168 | ||
169 | d = q->queuedata; | ||
209 | if ((d->flags & DEVFL_UP) == 0) { | 170 | if ((d->flags & DEVFL_UP) == 0) { |
210 | pr_info_ratelimited("aoe: device %ld.%d is not up\n", | 171 | pr_info_ratelimited("aoe: device %ld.%d is not up\n", |
211 | d->aoemajor, d->aoeminor); | 172 | d->aoemajor, d->aoeminor); |
212 | spin_unlock_irqrestore(&d->lock, flags); | 173 | while ((rq = blk_peek_request(q))) { |
213 | mempool_free(buf, d->bufpool); | 174 | blk_start_request(rq); |
214 | bio_endio(bio, -ENXIO); | 175 | aoe_end_request(d, rq, 1); |
176 | } | ||
215 | return; | 177 | return; |
216 | } | 178 | } |
217 | |||
218 | list_add_tail(&buf->bufs, &d->bufq); | ||
219 | |||
220 | aoecmd_work(d); | 179 | aoecmd_work(d); |
221 | __skb_queue_head_init(&queue); | ||
222 | skb_queue_splice_init(&d->sendq, &queue); | ||
223 | |||
224 | spin_unlock_irqrestore(&d->lock, flags); | ||
225 | aoenet_xmit(&queue); | ||
226 | } | 180 | } |
227 | 181 | ||
228 | static int | 182 | static int |
@@ -254,34 +208,46 @@ aoeblk_gdalloc(void *vp) | |||
254 | { | 208 | { |
255 | struct aoedev *d = vp; | 209 | struct aoedev *d = vp; |
256 | struct gendisk *gd; | 210 | struct gendisk *gd; |
257 | enum { KB = 1024, MB = KB * KB, READ_AHEAD = MB, }; | 211 | mempool_t *mp; |
212 | struct request_queue *q; | ||
213 | enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, }; | ||
258 | ulong flags; | 214 | ulong flags; |
259 | 215 | ||
260 | gd = alloc_disk(AOE_PARTITIONS); | 216 | gd = alloc_disk(AOE_PARTITIONS); |
261 | if (gd == NULL) { | 217 | if (gd == NULL) { |
262 | printk(KERN_ERR | 218 | pr_err("aoe: cannot allocate disk structure for %ld.%d\n", |
263 | "aoe: cannot allocate disk structure for %ld.%d\n", | ||
264 | d->aoemajor, d->aoeminor); | 219 | d->aoemajor, d->aoeminor); |
265 | goto err; | 220 | goto err; |
266 | } | 221 | } |
267 | 222 | ||
268 | d->bufpool = mempool_create_slab_pool(MIN_BUFS, buf_pool_cache); | 223 | mp = mempool_create(MIN_BUFS, mempool_alloc_slab, mempool_free_slab, |
269 | if (d->bufpool == NULL) { | 224 | buf_pool_cache); |
225 | if (mp == NULL) { | ||
270 | printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n", | 226 | printk(KERN_ERR "aoe: cannot allocate bufpool for %ld.%d\n", |
271 | d->aoemajor, d->aoeminor); | 227 | d->aoemajor, d->aoeminor); |
272 | goto err_disk; | 228 | goto err_disk; |
273 | } | 229 | } |
230 | q = blk_init_queue(aoeblk_request, &d->lock); | ||
231 | if (q == NULL) { | ||
232 | pr_err("aoe: cannot allocate block queue for %ld.%d\n", | ||
233 | d->aoemajor, d->aoeminor); | ||
234 | mempool_destroy(mp); | ||
235 | goto err_disk; | ||
236 | } | ||
274 | 237 | ||
275 | d->blkq = blk_alloc_queue(GFP_KERNEL); | 238 | d->blkq = blk_alloc_queue(GFP_KERNEL); |
276 | if (!d->blkq) | 239 | if (!d->blkq) |
277 | goto err_mempool; | 240 | goto err_mempool; |
278 | blk_queue_make_request(d->blkq, aoeblk_make_request); | ||
279 | d->blkq->backing_dev_info.name = "aoe"; | 241 | d->blkq->backing_dev_info.name = "aoe"; |
280 | if (bdi_init(&d->blkq->backing_dev_info)) | 242 | if (bdi_init(&d->blkq->backing_dev_info)) |
281 | goto err_blkq; | 243 | goto err_blkq; |
282 | spin_lock_irqsave(&d->lock, flags); | 244 | spin_lock_irqsave(&d->lock, flags); |
283 | blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS); | 245 | blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS); |
284 | d->blkq->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; | 246 | q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; |
247 | d->bufpool = mp; | ||
248 | d->blkq = gd->queue = q; | ||
249 | q->queuedata = d; | ||
250 | d->gd = gd; | ||
285 | gd->major = AOE_MAJOR; | 251 | gd->major = AOE_MAJOR; |
286 | gd->first_minor = d->sysminor * AOE_PARTITIONS; | 252 | gd->first_minor = d->sysminor * AOE_PARTITIONS; |
287 | gd->fops = &aoe_bdops; | 253 | gd->fops = &aoe_bdops; |
@@ -290,8 +256,6 @@ aoeblk_gdalloc(void *vp) | |||
290 | snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", | 256 | snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", |
291 | d->aoemajor, d->aoeminor); | 257 | d->aoemajor, d->aoeminor); |
292 | 258 | ||
293 | gd->queue = d->blkq; | ||
294 | d->gd = gd; | ||
295 | d->flags &= ~DEVFL_GDALLOC; | 259 | d->flags &= ~DEVFL_GDALLOC; |
296 | d->flags |= DEVFL_UP; | 260 | d->flags |= DEVFL_UP; |
297 | 261 | ||