diff options
Diffstat (limited to 'block/blk-core.c')
-rw-r--r-- | block/blk-core.c | 1255 |
1 files changed, 8 insertions, 1247 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 937f9d0b9bd5..2c73ed1a8131 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/kernel_stat.h> | 20 | #include <linux/kernel_stat.h> |
21 | #include <linux/string.h> | 21 | #include <linux/string.h> |
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ | ||
24 | #include <linux/completion.h> | 23 | #include <linux/completion.h> |
25 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
26 | #include <linux/swap.h> | 25 | #include <linux/swap.h> |
@@ -34,20 +33,9 @@ | |||
34 | 33 | ||
35 | #include "blk.h" | 34 | #include "blk.h" |
36 | 35 | ||
37 | /* | ||
38 | * for max sense size | ||
39 | */ | ||
40 | #include <scsi/scsi_cmnd.h> | ||
41 | |||
42 | static void blk_unplug_work(struct work_struct *work); | ||
43 | static void blk_unplug_timeout(unsigned long data); | ||
44 | static void drive_stat_acct(struct request *rq, int new_io); | 36 | static void drive_stat_acct(struct request *rq, int new_io); |
45 | static void init_request_from_bio(struct request *req, struct bio *bio); | ||
46 | static int __make_request(struct request_queue *q, struct bio *bio); | 37 | static int __make_request(struct request_queue *q, struct bio *bio); |
47 | static struct io_context *current_io_context(gfp_t gfp_flags, int node); | ||
48 | static void blk_recalc_rq_segments(struct request *rq); | 38 | static void blk_recalc_rq_segments(struct request *rq); |
49 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | ||
50 | struct bio *bio); | ||
51 | 39 | ||
52 | /* | 40 | /* |
53 | * For the allocated request tables | 41 | * For the allocated request tables |
@@ -60,28 +48,12 @@ struct kmem_cache *request_cachep; | |||
60 | struct kmem_cache *blk_requestq_cachep = NULL; | 48 | struct kmem_cache *blk_requestq_cachep = NULL; |
61 | 49 | ||
62 | /* | 50 | /* |
63 | * For io context allocations | ||
64 | */ | ||
65 | static struct kmem_cache *iocontext_cachep; | ||
66 | |||
67 | /* | ||
68 | * Controlling structure to kblockd | 51 | * Controlling structure to kblockd |
69 | */ | 52 | */ |
70 | static struct workqueue_struct *kblockd_workqueue; | 53 | static struct workqueue_struct *kblockd_workqueue; |
71 | 54 | ||
72 | unsigned long blk_max_low_pfn, blk_max_pfn; | ||
73 | |||
74 | EXPORT_SYMBOL(blk_max_low_pfn); | ||
75 | EXPORT_SYMBOL(blk_max_pfn); | ||
76 | |||
77 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | 55 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); |
78 | 56 | ||
79 | /* Amount of time in which a process may batch requests */ | ||
80 | #define BLK_BATCH_TIME (HZ/50UL) | ||
81 | |||
82 | /* Number of requests a "batching" process may submit */ | ||
83 | #define BLK_BATCH_REQ 32 | ||
84 | |||
85 | void blk_queue_congestion_threshold(struct request_queue *q) | 57 | void blk_queue_congestion_threshold(struct request_queue *q) |
86 | { | 58 | { |
87 | int nr; | 59 | int nr; |
@@ -117,113 +89,7 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) | |||
117 | } | 89 | } |
118 | EXPORT_SYMBOL(blk_get_backing_dev_info); | 90 | EXPORT_SYMBOL(blk_get_backing_dev_info); |
119 | 91 | ||
120 | /** | 92 | void rq_init(struct request_queue *q, struct request *rq) |
121 | * blk_queue_prep_rq - set a prepare_request function for queue | ||
122 | * @q: queue | ||
123 | * @pfn: prepare_request function | ||
124 | * | ||
125 | * It's possible for a queue to register a prepare_request callback which | ||
126 | * is invoked before the request is handed to the request_fn. The goal of | ||
127 | * the function is to prepare a request for I/O, it can be used to build a | ||
128 | * cdb from the request data for instance. | ||
129 | * | ||
130 | */ | ||
131 | void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) | ||
132 | { | ||
133 | q->prep_rq_fn = pfn; | ||
134 | } | ||
135 | |||
136 | EXPORT_SYMBOL(blk_queue_prep_rq); | ||
137 | |||
138 | /** | ||
139 | * blk_queue_merge_bvec - set a merge_bvec function for queue | ||
140 | * @q: queue | ||
141 | * @mbfn: merge_bvec_fn | ||
142 | * | ||
143 | * Usually queues have static limitations on the max sectors or segments that | ||
144 | * we can put in a request. Stacking drivers may have some settings that | ||
145 | * are dynamic, and thus we have to query the queue whether it is ok to | ||
146 | * add a new bio_vec to a bio at a given offset or not. If the block device | ||
147 | * has such limitations, it needs to register a merge_bvec_fn to control | ||
148 | * the size of bio's sent to it. Note that a block device *must* allow a | ||
149 | * single page to be added to an empty bio. The block device driver may want | ||
150 | * to use the bio_split() function to deal with these bio's. By default | ||
151 | * no merge_bvec_fn is defined for a queue, and only the fixed limits are | ||
152 | * honored. | ||
153 | */ | ||
154 | void blk_queue_merge_bvec(struct request_queue *q, merge_bvec_fn *mbfn) | ||
155 | { | ||
156 | q->merge_bvec_fn = mbfn; | ||
157 | } | ||
158 | |||
159 | EXPORT_SYMBOL(blk_queue_merge_bvec); | ||
160 | |||
161 | void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) | ||
162 | { | ||
163 | q->softirq_done_fn = fn; | ||
164 | } | ||
165 | |||
166 | EXPORT_SYMBOL(blk_queue_softirq_done); | ||
167 | |||
168 | /** | ||
169 | * blk_queue_make_request - define an alternate make_request function for a device | ||
170 | * @q: the request queue for the device to be affected | ||
171 | * @mfn: the alternate make_request function | ||
172 | * | ||
173 | * Description: | ||
174 | * The normal way for &struct bios to be passed to a device | ||
175 | * driver is for them to be collected into requests on a request | ||
176 | * queue, and then to allow the device driver to select requests | ||
177 | * off that queue when it is ready. This works well for many block | ||
178 | * devices. However some block devices (typically virtual devices | ||
179 | * such as md or lvm) do not benefit from the processing on the | ||
180 | * request queue, and are served best by having the requests passed | ||
181 | * directly to them. This can be achieved by providing a function | ||
182 | * to blk_queue_make_request(). | ||
183 | * | ||
184 | * Caveat: | ||
185 | * The driver that does this *must* be able to deal appropriately | ||
186 | * with buffers in "highmemory". This can be accomplished by either calling | ||
187 | * __bio_kmap_atomic() to get a temporary kernel mapping, or by calling | ||
188 | * blk_queue_bounce() to create a buffer in normal memory. | ||
189 | **/ | ||
190 | void blk_queue_make_request(struct request_queue * q, make_request_fn * mfn) | ||
191 | { | ||
192 | /* | ||
193 | * set defaults | ||
194 | */ | ||
195 | q->nr_requests = BLKDEV_MAX_RQ; | ||
196 | blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); | ||
197 | blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); | ||
198 | q->make_request_fn = mfn; | ||
199 | q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | ||
200 | q->backing_dev_info.state = 0; | ||
201 | q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; | ||
202 | blk_queue_max_sectors(q, SAFE_MAX_SECTORS); | ||
203 | blk_queue_hardsect_size(q, 512); | ||
204 | blk_queue_dma_alignment(q, 511); | ||
205 | blk_queue_congestion_threshold(q); | ||
206 | q->nr_batching = BLK_BATCH_REQ; | ||
207 | |||
208 | q->unplug_thresh = 4; /* hmm */ | ||
209 | q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ | ||
210 | if (q->unplug_delay == 0) | ||
211 | q->unplug_delay = 1; | ||
212 | |||
213 | INIT_WORK(&q->unplug_work, blk_unplug_work); | ||
214 | |||
215 | q->unplug_timer.function = blk_unplug_timeout; | ||
216 | q->unplug_timer.data = (unsigned long)q; | ||
217 | |||
218 | /* | ||
219 | * by default assume old behaviour and bounce for any highmem page | ||
220 | */ | ||
221 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); | ||
222 | } | ||
223 | |||
224 | EXPORT_SYMBOL(blk_queue_make_request); | ||
225 | |||
226 | static void rq_init(struct request_queue *q, struct request *rq) | ||
227 | { | 93 | { |
228 | INIT_LIST_HEAD(&rq->queuelist); | 94 | INIT_LIST_HEAD(&rq->queuelist); |
229 | INIT_LIST_HEAD(&rq->donelist); | 95 | INIT_LIST_HEAD(&rq->donelist); |
@@ -247,255 +113,6 @@ static void rq_init(struct request_queue *q, struct request *rq) | |||
247 | rq->next_rq = NULL; | 113 | rq->next_rq = NULL; |
248 | } | 114 | } |
249 | 115 | ||
250 | /** | ||
251 | * blk_queue_ordered - does this queue support ordered writes | ||
252 | * @q: the request queue | ||
253 | * @ordered: one of QUEUE_ORDERED_* | ||
254 | * @prepare_flush_fn: rq setup helper for cache flush ordered writes | ||
255 | * | ||
256 | * Description: | ||
257 | * For journalled file systems, doing ordered writes on a commit | ||
258 | * block instead of explicitly doing wait_on_buffer (which is bad | ||
259 | * for performance) can be a big win. Block drivers supporting this | ||
260 | * feature should call this function and indicate so. | ||
261 | * | ||
262 | **/ | ||
263 | int blk_queue_ordered(struct request_queue *q, unsigned ordered, | ||
264 | prepare_flush_fn *prepare_flush_fn) | ||
265 | { | ||
266 | if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && | ||
267 | prepare_flush_fn == NULL) { | ||
268 | printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); | ||
269 | return -EINVAL; | ||
270 | } | ||
271 | |||
272 | if (ordered != QUEUE_ORDERED_NONE && | ||
273 | ordered != QUEUE_ORDERED_DRAIN && | ||
274 | ordered != QUEUE_ORDERED_DRAIN_FLUSH && | ||
275 | ordered != QUEUE_ORDERED_DRAIN_FUA && | ||
276 | ordered != QUEUE_ORDERED_TAG && | ||
277 | ordered != QUEUE_ORDERED_TAG_FLUSH && | ||
278 | ordered != QUEUE_ORDERED_TAG_FUA) { | ||
279 | printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); | ||
280 | return -EINVAL; | ||
281 | } | ||
282 | |||
283 | q->ordered = ordered; | ||
284 | q->next_ordered = ordered; | ||
285 | q->prepare_flush_fn = prepare_flush_fn; | ||
286 | |||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | EXPORT_SYMBOL(blk_queue_ordered); | ||
291 | |||
292 | /* | ||
293 | * Cache flushing for ordered writes handling | ||
294 | */ | ||
295 | inline unsigned blk_ordered_cur_seq(struct request_queue *q) | ||
296 | { | ||
297 | if (!q->ordseq) | ||
298 | return 0; | ||
299 | return 1 << ffz(q->ordseq); | ||
300 | } | ||
301 | |||
302 | unsigned blk_ordered_req_seq(struct request *rq) | ||
303 | { | ||
304 | struct request_queue *q = rq->q; | ||
305 | |||
306 | BUG_ON(q->ordseq == 0); | ||
307 | |||
308 | if (rq == &q->pre_flush_rq) | ||
309 | return QUEUE_ORDSEQ_PREFLUSH; | ||
310 | if (rq == &q->bar_rq) | ||
311 | return QUEUE_ORDSEQ_BAR; | ||
312 | if (rq == &q->post_flush_rq) | ||
313 | return QUEUE_ORDSEQ_POSTFLUSH; | ||
314 | |||
315 | /* | ||
316 | * !fs requests don't need to follow barrier ordering. Always | ||
317 | * put them at the front. This fixes the following deadlock. | ||
318 | * | ||
319 | * http://thread.gmane.org/gmane.linux.kernel/537473 | ||
320 | */ | ||
321 | if (!blk_fs_request(rq)) | ||
322 | return QUEUE_ORDSEQ_DRAIN; | ||
323 | |||
324 | if ((rq->cmd_flags & REQ_ORDERED_COLOR) == | ||
325 | (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) | ||
326 | return QUEUE_ORDSEQ_DRAIN; | ||
327 | else | ||
328 | return QUEUE_ORDSEQ_DONE; | ||
329 | } | ||
330 | |||
331 | void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) | ||
332 | { | ||
333 | struct request *rq; | ||
334 | |||
335 | if (error && !q->orderr) | ||
336 | q->orderr = error; | ||
337 | |||
338 | BUG_ON(q->ordseq & seq); | ||
339 | q->ordseq |= seq; | ||
340 | |||
341 | if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) | ||
342 | return; | ||
343 | |||
344 | /* | ||
345 | * Okay, sequence complete. | ||
346 | */ | ||
347 | q->ordseq = 0; | ||
348 | rq = q->orig_bar_rq; | ||
349 | |||
350 | if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) | ||
351 | BUG(); | ||
352 | } | ||
353 | |||
354 | static void pre_flush_end_io(struct request *rq, int error) | ||
355 | { | ||
356 | elv_completed_request(rq->q, rq); | ||
357 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); | ||
358 | } | ||
359 | |||
360 | static void bar_end_io(struct request *rq, int error) | ||
361 | { | ||
362 | elv_completed_request(rq->q, rq); | ||
363 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); | ||
364 | } | ||
365 | |||
366 | static void post_flush_end_io(struct request *rq, int error) | ||
367 | { | ||
368 | elv_completed_request(rq->q, rq); | ||
369 | blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); | ||
370 | } | ||
371 | |||
372 | static void queue_flush(struct request_queue *q, unsigned which) | ||
373 | { | ||
374 | struct request *rq; | ||
375 | rq_end_io_fn *end_io; | ||
376 | |||
377 | if (which == QUEUE_ORDERED_PREFLUSH) { | ||
378 | rq = &q->pre_flush_rq; | ||
379 | end_io = pre_flush_end_io; | ||
380 | } else { | ||
381 | rq = &q->post_flush_rq; | ||
382 | end_io = post_flush_end_io; | ||
383 | } | ||
384 | |||
385 | rq->cmd_flags = REQ_HARDBARRIER; | ||
386 | rq_init(q, rq); | ||
387 | rq->elevator_private = NULL; | ||
388 | rq->elevator_private2 = NULL; | ||
389 | rq->rq_disk = q->bar_rq.rq_disk; | ||
390 | rq->end_io = end_io; | ||
391 | q->prepare_flush_fn(q, rq); | ||
392 | |||
393 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | ||
394 | } | ||
395 | |||
396 | static inline struct request *start_ordered(struct request_queue *q, | ||
397 | struct request *rq) | ||
398 | { | ||
399 | q->orderr = 0; | ||
400 | q->ordered = q->next_ordered; | ||
401 | q->ordseq |= QUEUE_ORDSEQ_STARTED; | ||
402 | |||
403 | /* | ||
404 | * Prep proxy barrier request. | ||
405 | */ | ||
406 | blkdev_dequeue_request(rq); | ||
407 | q->orig_bar_rq = rq; | ||
408 | rq = &q->bar_rq; | ||
409 | rq->cmd_flags = 0; | ||
410 | rq_init(q, rq); | ||
411 | if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) | ||
412 | rq->cmd_flags |= REQ_RW; | ||
413 | if (q->ordered & QUEUE_ORDERED_FUA) | ||
414 | rq->cmd_flags |= REQ_FUA; | ||
415 | rq->elevator_private = NULL; | ||
416 | rq->elevator_private2 = NULL; | ||
417 | init_request_from_bio(rq, q->orig_bar_rq->bio); | ||
418 | rq->end_io = bar_end_io; | ||
419 | |||
420 | /* | ||
421 | * Queue ordered sequence. As we stack them at the head, we | ||
422 | * need to queue in reverse order. Note that we rely on that | ||
423 | * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs | ||
424 | * request gets inbetween ordered sequence. If this request is | ||
425 | * an empty barrier, we don't need to do a postflush ever since | ||
426 | * there will be no data written between the pre and post flush. | ||
427 | * Hence a single flush will suffice. | ||
428 | */ | ||
429 | if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) | ||
430 | queue_flush(q, QUEUE_ORDERED_POSTFLUSH); | ||
431 | else | ||
432 | q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; | ||
433 | |||
434 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | ||
435 | |||
436 | if (q->ordered & QUEUE_ORDERED_PREFLUSH) { | ||
437 | queue_flush(q, QUEUE_ORDERED_PREFLUSH); | ||
438 | rq = &q->pre_flush_rq; | ||
439 | } else | ||
440 | q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; | ||
441 | |||
442 | if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) | ||
443 | q->ordseq |= QUEUE_ORDSEQ_DRAIN; | ||
444 | else | ||
445 | rq = NULL; | ||
446 | |||
447 | return rq; | ||
448 | } | ||
449 | |||
450 | int blk_do_ordered(struct request_queue *q, struct request **rqp) | ||
451 | { | ||
452 | struct request *rq = *rqp; | ||
453 | const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); | ||
454 | |||
455 | if (!q->ordseq) { | ||
456 | if (!is_barrier) | ||
457 | return 1; | ||
458 | |||
459 | if (q->next_ordered != QUEUE_ORDERED_NONE) { | ||
460 | *rqp = start_ordered(q, rq); | ||
461 | return 1; | ||
462 | } else { | ||
463 | /* | ||
464 | * This can happen when the queue switches to | ||
465 | * ORDERED_NONE while this request is on it. | ||
466 | */ | ||
467 | blkdev_dequeue_request(rq); | ||
468 | if (__blk_end_request(rq, -EOPNOTSUPP, | ||
469 | blk_rq_bytes(rq))) | ||
470 | BUG(); | ||
471 | *rqp = NULL; | ||
472 | return 0; | ||
473 | } | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * Ordered sequence in progress | ||
478 | */ | ||
479 | |||
480 | /* Special requests are not subject to ordering rules. */ | ||
481 | if (!blk_fs_request(rq) && | ||
482 | rq != &q->pre_flush_rq && rq != &q->post_flush_rq) | ||
483 | return 1; | ||
484 | |||
485 | if (q->ordered & QUEUE_ORDERED_TAG) { | ||
486 | /* Ordered by tag. Blocking the next barrier is enough. */ | ||
487 | if (is_barrier && rq != &q->bar_rq) | ||
488 | *rqp = NULL; | ||
489 | } else { | ||
490 | /* Ordered by draining. Wait for turn. */ | ||
491 | WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); | ||
492 | if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) | ||
493 | *rqp = NULL; | ||
494 | } | ||
495 | |||
496 | return 1; | ||
497 | } | ||
498 | |||
499 | static void req_bio_endio(struct request *rq, struct bio *bio, | 116 | static void req_bio_endio(struct request *rq, struct bio *bio, |
500 | unsigned int nbytes, int error) | 117 | unsigned int nbytes, int error) |
501 | { | 118 | { |
@@ -528,279 +145,6 @@ static void req_bio_endio(struct request *rq, struct bio *bio, | |||
528 | } | 145 | } |
529 | } | 146 | } |
530 | 147 | ||
531 | /** | ||
532 | * blk_queue_bounce_limit - set bounce buffer limit for queue | ||
533 | * @q: the request queue for the device | ||
534 | * @dma_addr: bus address limit | ||
535 | * | ||
536 | * Description: | ||
537 | * Different hardware can have different requirements as to what pages | ||
538 | * it can do I/O directly to. A low level driver can call | ||
539 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce | ||
540 | * buffers for doing I/O to pages residing above @page. | ||
541 | **/ | ||
542 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) | ||
543 | { | ||
544 | unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT; | ||
545 | int dma = 0; | ||
546 | |||
547 | q->bounce_gfp = GFP_NOIO; | ||
548 | #if BITS_PER_LONG == 64 | ||
549 | /* Assume anything <= 4GB can be handled by IOMMU. | ||
550 | Actually some IOMMUs can handle everything, but I don't | ||
551 | know of a way to test this here. */ | ||
552 | if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) | ||
553 | dma = 1; | ||
554 | q->bounce_pfn = max_low_pfn; | ||
555 | #else | ||
556 | if (bounce_pfn < blk_max_low_pfn) | ||
557 | dma = 1; | ||
558 | q->bounce_pfn = bounce_pfn; | ||
559 | #endif | ||
560 | if (dma) { | ||
561 | init_emergency_isa_pool(); | ||
562 | q->bounce_gfp = GFP_NOIO | GFP_DMA; | ||
563 | q->bounce_pfn = bounce_pfn; | ||
564 | } | ||
565 | } | ||
566 | |||
567 | EXPORT_SYMBOL(blk_queue_bounce_limit); | ||
568 | |||
569 | /** | ||
570 | * blk_queue_max_sectors - set max sectors for a request for this queue | ||
571 | * @q: the request queue for the device | ||
572 | * @max_sectors: max sectors in the usual 512b unit | ||
573 | * | ||
574 | * Description: | ||
575 | * Enables a low level driver to set an upper limit on the size of | ||
576 | * received requests. | ||
577 | **/ | ||
578 | void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) | ||
579 | { | ||
580 | if ((max_sectors << 9) < PAGE_CACHE_SIZE) { | ||
581 | max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); | ||
582 | printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors); | ||
583 | } | ||
584 | |||
585 | if (BLK_DEF_MAX_SECTORS > max_sectors) | ||
586 | q->max_hw_sectors = q->max_sectors = max_sectors; | ||
587 | else { | ||
588 | q->max_sectors = BLK_DEF_MAX_SECTORS; | ||
589 | q->max_hw_sectors = max_sectors; | ||
590 | } | ||
591 | } | ||
592 | |||
593 | EXPORT_SYMBOL(blk_queue_max_sectors); | ||
594 | |||
595 | /** | ||
596 | * blk_queue_max_phys_segments - set max phys segments for a request for this queue | ||
597 | * @q: the request queue for the device | ||
598 | * @max_segments: max number of segments | ||
599 | * | ||
600 | * Description: | ||
601 | * Enables a low level driver to set an upper limit on the number of | ||
602 | * physical data segments in a request. This would be the largest sized | ||
603 | * scatter list the driver could handle. | ||
604 | **/ | ||
605 | void blk_queue_max_phys_segments(struct request_queue *q, | ||
606 | unsigned short max_segments) | ||
607 | { | ||
608 | if (!max_segments) { | ||
609 | max_segments = 1; | ||
610 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); | ||
611 | } | ||
612 | |||
613 | q->max_phys_segments = max_segments; | ||
614 | } | ||
615 | |||
616 | EXPORT_SYMBOL(blk_queue_max_phys_segments); | ||
617 | |||
618 | /** | ||
619 | * blk_queue_max_hw_segments - set max hw segments for a request for this queue | ||
620 | * @q: the request queue for the device | ||
621 | * @max_segments: max number of segments | ||
622 | * | ||
623 | * Description: | ||
624 | * Enables a low level driver to set an upper limit on the number of | ||
625 | * hw data segments in a request. This would be the largest number of | ||
626 | * address/length pairs the host adapter can actually give as once | ||
627 | * to the device. | ||
628 | **/ | ||
629 | void blk_queue_max_hw_segments(struct request_queue *q, | ||
630 | unsigned short max_segments) | ||
631 | { | ||
632 | if (!max_segments) { | ||
633 | max_segments = 1; | ||
634 | printk("%s: set to minimum %d\n", __FUNCTION__, max_segments); | ||
635 | } | ||
636 | |||
637 | q->max_hw_segments = max_segments; | ||
638 | } | ||
639 | |||
640 | EXPORT_SYMBOL(blk_queue_max_hw_segments); | ||
641 | |||
642 | /** | ||
643 | * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg | ||
644 | * @q: the request queue for the device | ||
645 | * @max_size: max size of segment in bytes | ||
646 | * | ||
647 | * Description: | ||
648 | * Enables a low level driver to set an upper limit on the size of a | ||
649 | * coalesced segment | ||
650 | **/ | ||
651 | void blk_queue_max_segment_size(struct request_queue *q, unsigned int max_size) | ||
652 | { | ||
653 | if (max_size < PAGE_CACHE_SIZE) { | ||
654 | max_size = PAGE_CACHE_SIZE; | ||
655 | printk("%s: set to minimum %d\n", __FUNCTION__, max_size); | ||
656 | } | ||
657 | |||
658 | q->max_segment_size = max_size; | ||
659 | } | ||
660 | |||
661 | EXPORT_SYMBOL(blk_queue_max_segment_size); | ||
662 | |||
663 | /** | ||
664 | * blk_queue_hardsect_size - set hardware sector size for the queue | ||
665 | * @q: the request queue for the device | ||
666 | * @size: the hardware sector size, in bytes | ||
667 | * | ||
668 | * Description: | ||
669 | * This should typically be set to the lowest possible sector size | ||
670 | * that the hardware can operate on (possible without reverting to | ||
671 | * even internal read-modify-write operations). Usually the default | ||
672 | * of 512 covers most hardware. | ||
673 | **/ | ||
674 | void blk_queue_hardsect_size(struct request_queue *q, unsigned short size) | ||
675 | { | ||
676 | q->hardsect_size = size; | ||
677 | } | ||
678 | |||
679 | EXPORT_SYMBOL(blk_queue_hardsect_size); | ||
680 | |||
681 | /* | ||
682 | * Returns the minimum that is _not_ zero, unless both are zero. | ||
683 | */ | ||
684 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) | ||
685 | |||
686 | /** | ||
687 | * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers | ||
688 | * @t: the stacking driver (top) | ||
689 | * @b: the underlying device (bottom) | ||
690 | **/ | ||
691 | void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) | ||
692 | { | ||
693 | /* zero is "infinity" */ | ||
694 | t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors); | ||
695 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors); | ||
696 | |||
697 | t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments); | ||
698 | t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments); | ||
699 | t->max_segment_size = min(t->max_segment_size,b->max_segment_size); | ||
700 | t->hardsect_size = max(t->hardsect_size,b->hardsect_size); | ||
701 | if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) | ||
702 | clear_bit(QUEUE_FLAG_CLUSTER, &t->queue_flags); | ||
703 | } | ||
704 | |||
705 | EXPORT_SYMBOL(blk_queue_stack_limits); | ||
706 | |||
707 | /** | ||
708 | * blk_queue_dma_drain - Set up a drain buffer for excess dma. | ||
709 | * | ||
710 | * @q: the request queue for the device | ||
711 | * @buf: physically contiguous buffer | ||
712 | * @size: size of the buffer in bytes | ||
713 | * | ||
714 | * Some devices have excess DMA problems and can't simply discard (or | ||
715 | * zero fill) the unwanted piece of the transfer. They have to have a | ||
716 | * real area of memory to transfer it into. The use case for this is | ||
717 | * ATAPI devices in DMA mode. If the packet command causes a transfer | ||
718 | * bigger than the transfer size some HBAs will lock up if there | ||
719 | * aren't DMA elements to contain the excess transfer. What this API | ||
720 | * does is adjust the queue so that the buf is always appended | ||
721 | * silently to the scatterlist. | ||
722 | * | ||
723 | * Note: This routine adjusts max_hw_segments to make room for | ||
724 | * appending the drain buffer. If you call | ||
725 | * blk_queue_max_hw_segments() or blk_queue_max_phys_segments() after | ||
726 | * calling this routine, you must set the limit to one fewer than your | ||
727 | * device can support otherwise there won't be room for the drain | ||
728 | * buffer. | ||
729 | */ | ||
730 | int blk_queue_dma_drain(struct request_queue *q, void *buf, | ||
731 | unsigned int size) | ||
732 | { | ||
733 | if (q->max_hw_segments < 2 || q->max_phys_segments < 2) | ||
734 | return -EINVAL; | ||
735 | /* make room for appending the drain */ | ||
736 | --q->max_hw_segments; | ||
737 | --q->max_phys_segments; | ||
738 | q->dma_drain_buffer = buf; | ||
739 | q->dma_drain_size = size; | ||
740 | |||
741 | return 0; | ||
742 | } | ||
743 | |||
744 | EXPORT_SYMBOL_GPL(blk_queue_dma_drain); | ||
745 | |||
746 | /** | ||
747 | * blk_queue_segment_boundary - set boundary rules for segment merging | ||
748 | * @q: the request queue for the device | ||
749 | * @mask: the memory boundary mask | ||
750 | **/ | ||
751 | void blk_queue_segment_boundary(struct request_queue *q, unsigned long mask) | ||
752 | { | ||
753 | if (mask < PAGE_CACHE_SIZE - 1) { | ||
754 | mask = PAGE_CACHE_SIZE - 1; | ||
755 | printk("%s: set to minimum %lx\n", __FUNCTION__, mask); | ||
756 | } | ||
757 | |||
758 | q->seg_boundary_mask = mask; | ||
759 | } | ||
760 | |||
761 | EXPORT_SYMBOL(blk_queue_segment_boundary); | ||
762 | |||
763 | /** | ||
764 | * blk_queue_dma_alignment - set dma length and memory alignment | ||
765 | * @q: the request queue for the device | ||
766 | * @mask: alignment mask | ||
767 | * | ||
768 | * description: | ||
769 | * set required memory and length aligment for direct dma transactions. | ||
770 | * this is used when buiding direct io requests for the queue. | ||
771 | * | ||
772 | **/ | ||
773 | void blk_queue_dma_alignment(struct request_queue *q, int mask) | ||
774 | { | ||
775 | q->dma_alignment = mask; | ||
776 | } | ||
777 | |||
778 | EXPORT_SYMBOL(blk_queue_dma_alignment); | ||
779 | |||
780 | /** | ||
781 | * blk_queue_update_dma_alignment - update dma length and memory alignment | ||
782 | * @q: the request queue for the device | ||
783 | * @mask: alignment mask | ||
784 | * | ||
785 | * description: | ||
786 | * update required memory and length aligment for direct dma transactions. | ||
787 | * If the requested alignment is larger than the current alignment, then | ||
788 | * the current queue alignment is updated to the new value, otherwise it | ||
789 | * is left alone. The design of this is to allow multiple objects | ||
790 | * (driver, device, transport etc) to set their respective | ||
791 | * alignments without having them interfere. | ||
792 | * | ||
793 | **/ | ||
794 | void blk_queue_update_dma_alignment(struct request_queue *q, int mask) | ||
795 | { | ||
796 | BUG_ON(mask > PAGE_SIZE); | ||
797 | |||
798 | if (mask > q->dma_alignment) | ||
799 | q->dma_alignment = mask; | ||
800 | } | ||
801 | |||
802 | EXPORT_SYMBOL(blk_queue_update_dma_alignment); | ||
803 | |||
804 | void blk_dump_rq_flags(struct request *rq, char *msg) | 148 | void blk_dump_rq_flags(struct request *rq, char *msg) |
805 | { | 149 | { |
806 | int bit; | 150 | int bit; |
@@ -1074,8 +418,8 @@ static inline int ll_new_hw_segment(struct request_queue *q, | |||
1074 | return 1; | 418 | return 1; |
1075 | } | 419 | } |
1076 | 420 | ||
1077 | static int ll_back_merge_fn(struct request_queue *q, struct request *req, | 421 | int ll_back_merge_fn(struct request_queue *q, struct request *req, |
1078 | struct bio *bio) | 422 | struct bio *bio) |
1079 | { | 423 | { |
1080 | unsigned short max_sectors; | 424 | unsigned short max_sectors; |
1081 | int len; | 425 | int len; |
@@ -1285,7 +629,7 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi, | |||
1285 | blk_unplug(q); | 629 | blk_unplug(q); |
1286 | } | 630 | } |
1287 | 631 | ||
1288 | static void blk_unplug_work(struct work_struct *work) | 632 | void blk_unplug_work(struct work_struct *work) |
1289 | { | 633 | { |
1290 | struct request_queue *q = | 634 | struct request_queue *q = |
1291 | container_of(work, struct request_queue, unplug_work); | 635 | container_of(work, struct request_queue, unplug_work); |
@@ -1296,7 +640,7 @@ static void blk_unplug_work(struct work_struct *work) | |||
1296 | q->unplug_fn(q); | 640 | q->unplug_fn(q); |
1297 | } | 641 | } |
1298 | 642 | ||
1299 | static void blk_unplug_timeout(unsigned long data) | 643 | void blk_unplug_timeout(unsigned long data) |
1300 | { | 644 | { |
1301 | struct request_queue *q = (struct request_queue *)data; | 645 | struct request_queue *q = (struct request_queue *)data; |
1302 | 646 | ||
@@ -1961,393 +1305,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq, | |||
1961 | 1305 | ||
1962 | EXPORT_SYMBOL(blk_insert_request); | 1306 | EXPORT_SYMBOL(blk_insert_request); |
1963 | 1307 | ||
1964 | static int __blk_rq_unmap_user(struct bio *bio) | ||
1965 | { | ||
1966 | int ret = 0; | ||
1967 | |||
1968 | if (bio) { | ||
1969 | if (bio_flagged(bio, BIO_USER_MAPPED)) | ||
1970 | bio_unmap_user(bio); | ||
1971 | else | ||
1972 | ret = bio_uncopy_user(bio); | ||
1973 | } | ||
1974 | |||
1975 | return ret; | ||
1976 | } | ||
1977 | |||
1978 | int blk_rq_append_bio(struct request_queue *q, struct request *rq, | ||
1979 | struct bio *bio) | ||
1980 | { | ||
1981 | if (!rq->bio) | ||
1982 | blk_rq_bio_prep(q, rq, bio); | ||
1983 | else if (!ll_back_merge_fn(q, rq, bio)) | ||
1984 | return -EINVAL; | ||
1985 | else { | ||
1986 | rq->biotail->bi_next = bio; | ||
1987 | rq->biotail = bio; | ||
1988 | |||
1989 | rq->data_len += bio->bi_size; | ||
1990 | } | ||
1991 | return 0; | ||
1992 | } | ||
1993 | EXPORT_SYMBOL(blk_rq_append_bio); | ||
1994 | |||
1995 | static int __blk_rq_map_user(struct request_queue *q, struct request *rq, | ||
1996 | void __user *ubuf, unsigned int len) | ||
1997 | { | ||
1998 | unsigned long uaddr; | ||
1999 | struct bio *bio, *orig_bio; | ||
2000 | int reading, ret; | ||
2001 | |||
2002 | reading = rq_data_dir(rq) == READ; | ||
2003 | |||
2004 | /* | ||
2005 | * if alignment requirement is satisfied, map in user pages for | ||
2006 | * direct dma. else, set up kernel bounce buffers | ||
2007 | */ | ||
2008 | uaddr = (unsigned long) ubuf; | ||
2009 | if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) | ||
2010 | bio = bio_map_user(q, NULL, uaddr, len, reading); | ||
2011 | else | ||
2012 | bio = bio_copy_user(q, uaddr, len, reading); | ||
2013 | |||
2014 | if (IS_ERR(bio)) | ||
2015 | return PTR_ERR(bio); | ||
2016 | |||
2017 | orig_bio = bio; | ||
2018 | blk_queue_bounce(q, &bio); | ||
2019 | |||
2020 | /* | ||
2021 | * We link the bounce buffer in and could have to traverse it | ||
2022 | * later so we have to get a ref to prevent it from being freed | ||
2023 | */ | ||
2024 | bio_get(bio); | ||
2025 | |||
2026 | ret = blk_rq_append_bio(q, rq, bio); | ||
2027 | if (!ret) | ||
2028 | return bio->bi_size; | ||
2029 | |||
2030 | /* if it was boucned we must call the end io function */ | ||
2031 | bio_endio(bio, 0); | ||
2032 | __blk_rq_unmap_user(orig_bio); | ||
2033 | bio_put(bio); | ||
2034 | return ret; | ||
2035 | } | ||
2036 | |||
2037 | /** | ||
2038 | * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage | ||
2039 | * @q: request queue where request should be inserted | ||
2040 | * @rq: request structure to fill | ||
2041 | * @ubuf: the user buffer | ||
2042 | * @len: length of user data | ||
2043 | * | ||
2044 | * Description: | ||
2045 | * Data will be mapped directly for zero copy io, if possible. Otherwise | ||
2046 | * a kernel bounce buffer is used. | ||
2047 | * | ||
2048 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | ||
2049 | * still in process context. | ||
2050 | * | ||
2051 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | ||
2052 | * before being submitted to the device, as pages mapped may be out of | ||
2053 | * reach. It's the callers responsibility to make sure this happens. The | ||
2054 | * original bio must be passed back in to blk_rq_unmap_user() for proper | ||
2055 | * unmapping. | ||
2056 | */ | ||
2057 | int blk_rq_map_user(struct request_queue *q, struct request *rq, | ||
2058 | void __user *ubuf, unsigned long len) | ||
2059 | { | ||
2060 | unsigned long bytes_read = 0; | ||
2061 | struct bio *bio = NULL; | ||
2062 | int ret; | ||
2063 | |||
2064 | if (len > (q->max_hw_sectors << 9)) | ||
2065 | return -EINVAL; | ||
2066 | if (!len || !ubuf) | ||
2067 | return -EINVAL; | ||
2068 | |||
2069 | while (bytes_read != len) { | ||
2070 | unsigned long map_len, end, start; | ||
2071 | |||
2072 | map_len = min_t(unsigned long, len - bytes_read, BIO_MAX_SIZE); | ||
2073 | end = ((unsigned long)ubuf + map_len + PAGE_SIZE - 1) | ||
2074 | >> PAGE_SHIFT; | ||
2075 | start = (unsigned long)ubuf >> PAGE_SHIFT; | ||
2076 | |||
2077 | /* | ||
2078 | * A bad offset could cause us to require BIO_MAX_PAGES + 1 | ||
2079 | * pages. If this happens we just lower the requested | ||
2080 | * mapping len by a page so that we can fit | ||
2081 | */ | ||
2082 | if (end - start > BIO_MAX_PAGES) | ||
2083 | map_len -= PAGE_SIZE; | ||
2084 | |||
2085 | ret = __blk_rq_map_user(q, rq, ubuf, map_len); | ||
2086 | if (ret < 0) | ||
2087 | goto unmap_rq; | ||
2088 | if (!bio) | ||
2089 | bio = rq->bio; | ||
2090 | bytes_read += ret; | ||
2091 | ubuf += ret; | ||
2092 | } | ||
2093 | |||
2094 | rq->buffer = rq->data = NULL; | ||
2095 | return 0; | ||
2096 | unmap_rq: | ||
2097 | blk_rq_unmap_user(bio); | ||
2098 | return ret; | ||
2099 | } | ||
2100 | |||
2101 | EXPORT_SYMBOL(blk_rq_map_user); | ||
2102 | |||
2103 | /** | ||
2104 | * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage | ||
2105 | * @q: request queue where request should be inserted | ||
2106 | * @rq: request to map data to | ||
2107 | * @iov: pointer to the iovec | ||
2108 | * @iov_count: number of elements in the iovec | ||
2109 | * @len: I/O byte count | ||
2110 | * | ||
2111 | * Description: | ||
2112 | * Data will be mapped directly for zero copy io, if possible. Otherwise | ||
2113 | * a kernel bounce buffer is used. | ||
2114 | * | ||
2115 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | ||
2116 | * still in process context. | ||
2117 | * | ||
2118 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | ||
2119 | * before being submitted to the device, as pages mapped may be out of | ||
2120 | * reach. It's the callers responsibility to make sure this happens. The | ||
2121 | * original bio must be passed back in to blk_rq_unmap_user() for proper | ||
2122 | * unmapping. | ||
2123 | */ | ||
2124 | int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | ||
2125 | struct sg_iovec *iov, int iov_count, unsigned int len) | ||
2126 | { | ||
2127 | struct bio *bio; | ||
2128 | |||
2129 | if (!iov || iov_count <= 0) | ||
2130 | return -EINVAL; | ||
2131 | |||
2132 | /* we don't allow misaligned data like bio_map_user() does. If the | ||
2133 | * user is using sg, they're expected to know the alignment constraints | ||
2134 | * and respect them accordingly */ | ||
2135 | bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); | ||
2136 | if (IS_ERR(bio)) | ||
2137 | return PTR_ERR(bio); | ||
2138 | |||
2139 | if (bio->bi_size != len) { | ||
2140 | bio_endio(bio, 0); | ||
2141 | bio_unmap_user(bio); | ||
2142 | return -EINVAL; | ||
2143 | } | ||
2144 | |||
2145 | bio_get(bio); | ||
2146 | blk_rq_bio_prep(q, rq, bio); | ||
2147 | rq->buffer = rq->data = NULL; | ||
2148 | return 0; | ||
2149 | } | ||
2150 | |||
2151 | EXPORT_SYMBOL(blk_rq_map_user_iov); | ||
2152 | |||
2153 | /** | ||
2154 | * blk_rq_unmap_user - unmap a request with user data | ||
2155 | * @bio: start of bio list | ||
2156 | * | ||
2157 | * Description: | ||
2158 | * Unmap a rq previously mapped by blk_rq_map_user(). The caller must | ||
2159 | * supply the original rq->bio from the blk_rq_map_user() return, since | ||
2160 | * the io completion may have changed rq->bio. | ||
2161 | */ | ||
2162 | int blk_rq_unmap_user(struct bio *bio) | ||
2163 | { | ||
2164 | struct bio *mapped_bio; | ||
2165 | int ret = 0, ret2; | ||
2166 | |||
2167 | while (bio) { | ||
2168 | mapped_bio = bio; | ||
2169 | if (unlikely(bio_flagged(bio, BIO_BOUNCED))) | ||
2170 | mapped_bio = bio->bi_private; | ||
2171 | |||
2172 | ret2 = __blk_rq_unmap_user(mapped_bio); | ||
2173 | if (ret2 && !ret) | ||
2174 | ret = ret2; | ||
2175 | |||
2176 | mapped_bio = bio; | ||
2177 | bio = bio->bi_next; | ||
2178 | bio_put(mapped_bio); | ||
2179 | } | ||
2180 | |||
2181 | return ret; | ||
2182 | } | ||
2183 | |||
2184 | EXPORT_SYMBOL(blk_rq_unmap_user); | ||
2185 | |||
2186 | /** | ||
2187 | * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage | ||
2188 | * @q: request queue where request should be inserted | ||
2189 | * @rq: request to fill | ||
2190 | * @kbuf: the kernel buffer | ||
2191 | * @len: length of user data | ||
2192 | * @gfp_mask: memory allocation flags | ||
2193 | */ | ||
2194 | int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, | ||
2195 | unsigned int len, gfp_t gfp_mask) | ||
2196 | { | ||
2197 | struct bio *bio; | ||
2198 | |||
2199 | if (len > (q->max_hw_sectors << 9)) | ||
2200 | return -EINVAL; | ||
2201 | if (!len || !kbuf) | ||
2202 | return -EINVAL; | ||
2203 | |||
2204 | bio = bio_map_kern(q, kbuf, len, gfp_mask); | ||
2205 | if (IS_ERR(bio)) | ||
2206 | return PTR_ERR(bio); | ||
2207 | |||
2208 | if (rq_data_dir(rq) == WRITE) | ||
2209 | bio->bi_rw |= (1 << BIO_RW); | ||
2210 | |||
2211 | blk_rq_bio_prep(q, rq, bio); | ||
2212 | blk_queue_bounce(q, &rq->bio); | ||
2213 | rq->buffer = rq->data = NULL; | ||
2214 | return 0; | ||
2215 | } | ||
2216 | |||
2217 | EXPORT_SYMBOL(blk_rq_map_kern); | ||
2218 | |||
2219 | /** | ||
2220 | * blk_execute_rq_nowait - insert a request into queue for execution | ||
2221 | * @q: queue to insert the request in | ||
2222 | * @bd_disk: matching gendisk | ||
2223 | * @rq: request to insert | ||
2224 | * @at_head: insert request at head or tail of queue | ||
2225 | * @done: I/O completion handler | ||
2226 | * | ||
2227 | * Description: | ||
2228 | * Insert a fully prepared request at the back of the io scheduler queue | ||
2229 | * for execution. Don't wait for completion. | ||
2230 | */ | ||
2231 | void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | ||
2232 | struct request *rq, int at_head, | ||
2233 | rq_end_io_fn *done) | ||
2234 | { | ||
2235 | int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; | ||
2236 | |||
2237 | rq->rq_disk = bd_disk; | ||
2238 | rq->cmd_flags |= REQ_NOMERGE; | ||
2239 | rq->end_io = done; | ||
2240 | WARN_ON(irqs_disabled()); | ||
2241 | spin_lock_irq(q->queue_lock); | ||
2242 | __elv_add_request(q, rq, where, 1); | ||
2243 | __generic_unplug_device(q); | ||
2244 | spin_unlock_irq(q->queue_lock); | ||
2245 | } | ||
2246 | EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); | ||
2247 | |||
2248 | /** | ||
2249 | * blk_execute_rq - insert a request into queue for execution | ||
2250 | * @q: queue to insert the request in | ||
2251 | * @bd_disk: matching gendisk | ||
2252 | * @rq: request to insert | ||
2253 | * @at_head: insert request at head or tail of queue | ||
2254 | * | ||
2255 | * Description: | ||
2256 | * Insert a fully prepared request at the back of the io scheduler queue | ||
2257 | * for execution and wait for completion. | ||
2258 | */ | ||
2259 | int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, | ||
2260 | struct request *rq, int at_head) | ||
2261 | { | ||
2262 | DECLARE_COMPLETION_ONSTACK(wait); | ||
2263 | char sense[SCSI_SENSE_BUFFERSIZE]; | ||
2264 | int err = 0; | ||
2265 | |||
2266 | /* | ||
2267 | * we need an extra reference to the request, so we can look at | ||
2268 | * it after io completion | ||
2269 | */ | ||
2270 | rq->ref_count++; | ||
2271 | |||
2272 | if (!rq->sense) { | ||
2273 | memset(sense, 0, sizeof(sense)); | ||
2274 | rq->sense = sense; | ||
2275 | rq->sense_len = 0; | ||
2276 | } | ||
2277 | |||
2278 | rq->end_io_data = &wait; | ||
2279 | blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); | ||
2280 | wait_for_completion(&wait); | ||
2281 | |||
2282 | if (rq->errors) | ||
2283 | err = -EIO; | ||
2284 | |||
2285 | return err; | ||
2286 | } | ||
2287 | |||
2288 | EXPORT_SYMBOL(blk_execute_rq); | ||
2289 | |||
2290 | static void bio_end_empty_barrier(struct bio *bio, int err) | ||
2291 | { | ||
2292 | if (err) | ||
2293 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
2294 | |||
2295 | complete(bio->bi_private); | ||
2296 | } | ||
2297 | |||
2298 | /** | ||
2299 | * blkdev_issue_flush - queue a flush | ||
2300 | * @bdev: blockdev to issue flush for | ||
2301 | * @error_sector: error sector | ||
2302 | * | ||
2303 | * Description: | ||
2304 | * Issue a flush for the block device in question. Caller can supply | ||
2305 | * room for storing the error offset in case of a flush error, if they | ||
2306 | * wish to. Caller must run wait_for_completion() on its own. | ||
2307 | */ | ||
2308 | int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | ||
2309 | { | ||
2310 | DECLARE_COMPLETION_ONSTACK(wait); | ||
2311 | struct request_queue *q; | ||
2312 | struct bio *bio; | ||
2313 | int ret; | ||
2314 | |||
2315 | if (bdev->bd_disk == NULL) | ||
2316 | return -ENXIO; | ||
2317 | |||
2318 | q = bdev_get_queue(bdev); | ||
2319 | if (!q) | ||
2320 | return -ENXIO; | ||
2321 | |||
2322 | bio = bio_alloc(GFP_KERNEL, 0); | ||
2323 | if (!bio) | ||
2324 | return -ENOMEM; | ||
2325 | |||
2326 | bio->bi_end_io = bio_end_empty_barrier; | ||
2327 | bio->bi_private = &wait; | ||
2328 | bio->bi_bdev = bdev; | ||
2329 | submit_bio(1 << BIO_RW_BARRIER, bio); | ||
2330 | |||
2331 | wait_for_completion(&wait); | ||
2332 | |||
2333 | /* | ||
2334 | * The driver must store the error location in ->bi_sector, if | ||
2335 | * it supports it. For non-stacked drivers, this should be copied | ||
2336 | * from rq->sector. | ||
2337 | */ | ||
2338 | if (error_sector) | ||
2339 | *error_sector = bio->bi_sector; | ||
2340 | |||
2341 | ret = 0; | ||
2342 | if (!bio_flagged(bio, BIO_UPTODATE)) | ||
2343 | ret = -EIO; | ||
2344 | |||
2345 | bio_put(bio); | ||
2346 | return ret; | ||
2347 | } | ||
2348 | |||
2349 | EXPORT_SYMBOL(blkdev_issue_flush); | ||
2350 | |||
2351 | static void drive_stat_acct(struct request *rq, int new_io) | 1308 | static void drive_stat_acct(struct request *rq, int new_io) |
2352 | { | 1309 | { |
2353 | int rw = rq_data_dir(rq); | 1310 | int rw = rq_data_dir(rq); |
@@ -2459,26 +1416,6 @@ void blk_put_request(struct request *req) | |||
2459 | 1416 | ||
2460 | EXPORT_SYMBOL(blk_put_request); | 1417 | EXPORT_SYMBOL(blk_put_request); |
2461 | 1418 | ||
2462 | /** | ||
2463 | * blk_end_sync_rq - executes a completion event on a request | ||
2464 | * @rq: request to complete | ||
2465 | * @error: end io status of the request | ||
2466 | */ | ||
2467 | void blk_end_sync_rq(struct request *rq, int error) | ||
2468 | { | ||
2469 | struct completion *waiting = rq->end_io_data; | ||
2470 | |||
2471 | rq->end_io_data = NULL; | ||
2472 | __blk_put_request(rq->q, rq); | ||
2473 | |||
2474 | /* | ||
2475 | * complete last, if this is a stack request the process (and thus | ||
2476 | * the rq pointer) could be invalid right after this complete() | ||
2477 | */ | ||
2478 | complete(waiting); | ||
2479 | } | ||
2480 | EXPORT_SYMBOL(blk_end_sync_rq); | ||
2481 | |||
2482 | /* | 1419 | /* |
2483 | * Has to be called with the request spinlock acquired | 1420 | * Has to be called with the request spinlock acquired |
2484 | */ | 1421 | */ |
@@ -2557,7 +1494,7 @@ static inline int attempt_front_merge(struct request_queue *q, | |||
2557 | return 0; | 1494 | return 0; |
2558 | } | 1495 | } |
2559 | 1496 | ||
2560 | static void init_request_from_bio(struct request *req, struct bio *bio) | 1497 | void init_request_from_bio(struct request *req, struct bio *bio) |
2561 | { | 1498 | { |
2562 | req->cmd_type = REQ_TYPE_FS; | 1499 | req->cmd_type = REQ_TYPE_FS; |
2563 | 1500 | ||
@@ -3524,8 +2461,8 @@ int blk_end_request_callback(struct request *rq, int error, int nr_bytes, | |||
3524 | } | 2461 | } |
3525 | EXPORT_SYMBOL_GPL(blk_end_request_callback); | 2462 | EXPORT_SYMBOL_GPL(blk_end_request_callback); |
3526 | 2463 | ||
3527 | static void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 2464 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
3528 | struct bio *bio) | 2465 | struct bio *bio) |
3529 | { | 2466 | { |
3530 | /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ | 2467 | /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ |
3531 | rq->cmd_flags |= (bio->bi_rw & 3); | 2468 | rq->cmd_flags |= (bio->bi_rw & 3); |
@@ -3571,188 +2508,12 @@ int __init blk_dev_init(void) | |||
3571 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", | 2508 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", |
3572 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); | 2509 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); |
3573 | 2510 | ||
3574 | iocontext_cachep = kmem_cache_create("blkdev_ioc", | ||
3575 | sizeof(struct io_context), 0, SLAB_PANIC, NULL); | ||
3576 | |||
3577 | for_each_possible_cpu(i) | 2511 | for_each_possible_cpu(i) |
3578 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | 2512 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); |
3579 | 2513 | ||
3580 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); | 2514 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); |
3581 | register_hotcpu_notifier(&blk_cpu_notifier); | 2515 | register_hotcpu_notifier(&blk_cpu_notifier); |
3582 | 2516 | ||
3583 | blk_max_low_pfn = max_low_pfn - 1; | ||
3584 | blk_max_pfn = max_pfn - 1; | ||
3585 | |||
3586 | return 0; | ||
3587 | } | ||
3588 | |||
3589 | static void cfq_dtor(struct io_context *ioc) | ||
3590 | { | ||
3591 | struct cfq_io_context *cic[1]; | ||
3592 | int r; | ||
3593 | |||
3594 | /* | ||
3595 | * We don't have a specific key to lookup with, so use the gang | ||
3596 | * lookup to just retrieve the first item stored. The cfq exit | ||
3597 | * function will iterate the full tree, so any member will do. | ||
3598 | */ | ||
3599 | r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1); | ||
3600 | if (r > 0) | ||
3601 | cic[0]->dtor(ioc); | ||
3602 | } | ||
3603 | |||
3604 | /* | ||
3605 | * IO Context helper functions. put_io_context() returns 1 if there are no | ||
3606 | * more users of this io context, 0 otherwise. | ||
3607 | */ | ||
3608 | int put_io_context(struct io_context *ioc) | ||
3609 | { | ||
3610 | if (ioc == NULL) | ||
3611 | return 1; | ||
3612 | |||
3613 | BUG_ON(atomic_read(&ioc->refcount) == 0); | ||
3614 | |||
3615 | if (atomic_dec_and_test(&ioc->refcount)) { | ||
3616 | rcu_read_lock(); | ||
3617 | if (ioc->aic && ioc->aic->dtor) | ||
3618 | ioc->aic->dtor(ioc->aic); | ||
3619 | rcu_read_unlock(); | ||
3620 | cfq_dtor(ioc); | ||
3621 | |||
3622 | kmem_cache_free(iocontext_cachep, ioc); | ||
3623 | return 1; | ||
3624 | } | ||
3625 | return 0; | 2517 | return 0; |
3626 | } | 2518 | } |
3627 | EXPORT_SYMBOL(put_io_context); | ||
3628 | |||
3629 | static void cfq_exit(struct io_context *ioc) | ||
3630 | { | ||
3631 | struct cfq_io_context *cic[1]; | ||
3632 | int r; | ||
3633 | |||
3634 | rcu_read_lock(); | ||
3635 | /* | ||
3636 | * See comment for cfq_dtor() | ||
3637 | */ | ||
3638 | r = radix_tree_gang_lookup(&ioc->radix_root, (void **) cic, 0, 1); | ||
3639 | rcu_read_unlock(); | ||
3640 | |||
3641 | if (r > 0) | ||
3642 | cic[0]->exit(ioc); | ||
3643 | } | ||
3644 | |||
3645 | /* Called by the exitting task */ | ||
3646 | void exit_io_context(void) | ||
3647 | { | ||
3648 | struct io_context *ioc; | ||
3649 | |||
3650 | task_lock(current); | ||
3651 | ioc = current->io_context; | ||
3652 | current->io_context = NULL; | ||
3653 | task_unlock(current); | ||
3654 | |||
3655 | if (atomic_dec_and_test(&ioc->nr_tasks)) { | ||
3656 | if (ioc->aic && ioc->aic->exit) | ||
3657 | ioc->aic->exit(ioc->aic); | ||
3658 | cfq_exit(ioc); | ||
3659 | |||
3660 | put_io_context(ioc); | ||
3661 | } | ||
3662 | } | ||
3663 | |||
3664 | struct io_context *alloc_io_context(gfp_t gfp_flags, int node) | ||
3665 | { | ||
3666 | struct io_context *ret; | ||
3667 | |||
3668 | ret = kmem_cache_alloc_node(iocontext_cachep, gfp_flags, node); | ||
3669 | if (ret) { | ||
3670 | atomic_set(&ret->refcount, 1); | ||
3671 | atomic_set(&ret->nr_tasks, 1); | ||
3672 | spin_lock_init(&ret->lock); | ||
3673 | ret->ioprio_changed = 0; | ||
3674 | ret->ioprio = 0; | ||
3675 | ret->last_waited = jiffies; /* doesn't matter... */ | ||
3676 | ret->nr_batch_requests = 0; /* because this is 0 */ | ||
3677 | ret->aic = NULL; | ||
3678 | INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); | ||
3679 | ret->ioc_data = NULL; | ||
3680 | } | ||
3681 | |||
3682 | return ret; | ||
3683 | } | ||
3684 | |||
3685 | /* | ||
3686 | * If the current task has no IO context then create one and initialise it. | ||
3687 | * Otherwise, return its existing IO context. | ||
3688 | * | ||
3689 | * This returned IO context doesn't have a specifically elevated refcount, | ||
3690 | * but since the current task itself holds a reference, the context can be | ||
3691 | * used in general code, so long as it stays within `current` context. | ||
3692 | */ | ||
3693 | static struct io_context *current_io_context(gfp_t gfp_flags, int node) | ||
3694 | { | ||
3695 | struct task_struct *tsk = current; | ||
3696 | struct io_context *ret; | ||
3697 | |||
3698 | ret = tsk->io_context; | ||
3699 | if (likely(ret)) | ||
3700 | return ret; | ||
3701 | |||
3702 | ret = alloc_io_context(gfp_flags, node); | ||
3703 | if (ret) { | ||
3704 | /* make sure set_task_ioprio() sees the settings above */ | ||
3705 | smp_wmb(); | ||
3706 | tsk->io_context = ret; | ||
3707 | } | ||
3708 | |||
3709 | return ret; | ||
3710 | } | ||
3711 | |||
3712 | /* | ||
3713 | * If the current task has no IO context then create one and initialise it. | ||
3714 | * If it does have a context, take a ref on it. | ||
3715 | * | ||
3716 | * This is always called in the context of the task which submitted the I/O. | ||
3717 | */ | ||
3718 | struct io_context *get_io_context(gfp_t gfp_flags, int node) | ||
3719 | { | ||
3720 | struct io_context *ret = NULL; | ||
3721 | |||
3722 | /* | ||
3723 | * Check for unlikely race with exiting task. ioc ref count is | ||
3724 | * zero when ioc is being detached. | ||
3725 | */ | ||
3726 | do { | ||
3727 | ret = current_io_context(gfp_flags, node); | ||
3728 | if (unlikely(!ret)) | ||
3729 | break; | ||
3730 | } while (!atomic_inc_not_zero(&ret->refcount)); | ||
3731 | |||
3732 | return ret; | ||
3733 | } | ||
3734 | EXPORT_SYMBOL(get_io_context); | ||
3735 | |||
3736 | void copy_io_context(struct io_context **pdst, struct io_context **psrc) | ||
3737 | { | ||
3738 | struct io_context *src = *psrc; | ||
3739 | struct io_context *dst = *pdst; | ||
3740 | |||
3741 | if (src) { | ||
3742 | BUG_ON(atomic_read(&src->refcount) == 0); | ||
3743 | atomic_inc(&src->refcount); | ||
3744 | put_io_context(dst); | ||
3745 | *pdst = src; | ||
3746 | } | ||
3747 | } | ||
3748 | EXPORT_SYMBOL(copy_io_context); | ||
3749 | |||
3750 | void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) | ||
3751 | { | ||
3752 | struct io_context *temp; | ||
3753 | temp = *ioc1; | ||
3754 | *ioc1 = *ioc2; | ||
3755 | *ioc2 = temp; | ||
3756 | } | ||
3757 | EXPORT_SYMBOL(swap_io_context); | ||
3758 | 2519 | ||