diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Makefile | 4 | ||||
-rw-r--r-- | block/as-iosched.c | 14 | ||||
-rw-r--r-- | block/blk-barrier.c | 72 | ||||
-rw-r--r-- | block/blk-core.c | 605 | ||||
-rw-r--r-- | block/blk-exec.c | 6 | ||||
-rw-r--r-- | block/blk-integrity.c | 33 | ||||
-rw-r--r-- | block/blk-map.c | 68 | ||||
-rw-r--r-- | block/blk-merge.c | 129 | ||||
-rw-r--r-- | block/blk-settings.c | 43 | ||||
-rw-r--r-- | block/blk-softirq.c | 175 | ||||
-rw-r--r-- | block/blk-sysfs.c | 35 | ||||
-rw-r--r-- | block/blk-tag.c | 22 | ||||
-rw-r--r-- | block/blk-timeout.c | 238 | ||||
-rw-r--r-- | block/blk.h | 48 | ||||
-rw-r--r-- | block/blktrace.c | 32 | ||||
-rw-r--r-- | block/bsg.c | 6 | ||||
-rw-r--r-- | block/cfq-iosched.c | 57 | ||||
-rw-r--r-- | block/cmd-filter.c | 9 | ||||
-rw-r--r-- | block/compat_ioctl.c | 1 | ||||
-rw-r--r-- | block/deadline-iosched.c | 40 | ||||
-rw-r--r-- | block/elevator.c | 40 | ||||
-rw-r--r-- | block/genhd.c | 965 | ||||
-rw-r--r-- | block/ioctl.c | 124 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 8 |
24 files changed, 1894 insertions, 880 deletions
diff --git a/block/Makefile b/block/Makefile index 208000b0750d..bfe73049f939 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -4,8 +4,8 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | 5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ |
6 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ | 6 | blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ |
7 | blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \ | 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ |
8 | cmd-filter.o | 8 | ioctl.o genhd.o scsi_ioctl.o cmd-filter.o |
9 | 9 | ||
10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | 10 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o |
11 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o | 11 | obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o |
diff --git a/block/as-iosched.c b/block/as-iosched.c index cf4eb0eefbbf..71f0abb219ee 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c | |||
@@ -462,7 +462,7 @@ static void as_antic_stop(struct as_data *ad) | |||
462 | del_timer(&ad->antic_timer); | 462 | del_timer(&ad->antic_timer); |
463 | ad->antic_status = ANTIC_FINISHED; | 463 | ad->antic_status = ANTIC_FINISHED; |
464 | /* see as_work_handler */ | 464 | /* see as_work_handler */ |
465 | kblockd_schedule_work(&ad->antic_work); | 465 | kblockd_schedule_work(ad->q, &ad->antic_work); |
466 | } | 466 | } |
467 | } | 467 | } |
468 | 468 | ||
@@ -483,7 +483,7 @@ static void as_antic_timeout(unsigned long data) | |||
483 | aic = ad->io_context->aic; | 483 | aic = ad->io_context->aic; |
484 | 484 | ||
485 | ad->antic_status = ANTIC_FINISHED; | 485 | ad->antic_status = ANTIC_FINISHED; |
486 | kblockd_schedule_work(&ad->antic_work); | 486 | kblockd_schedule_work(q, &ad->antic_work); |
487 | 487 | ||
488 | if (aic->ttime_samples == 0) { | 488 | if (aic->ttime_samples == 0) { |
489 | /* process anticipated on has exited or timed out*/ | 489 | /* process anticipated on has exited or timed out*/ |
@@ -745,6 +745,14 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) | |||
745 | */ | 745 | */ |
746 | static int as_can_anticipate(struct as_data *ad, struct request *rq) | 746 | static int as_can_anticipate(struct as_data *ad, struct request *rq) |
747 | { | 747 | { |
748 | #if 0 /* disable for now, we need to check tag level as well */ | ||
749 | /* | ||
750 | * SSD device without seek penalty, disable idling | ||
751 | */ | ||
752 | if (blk_queue_nonrot(ad->q)) axman | ||
753 | return 0; | ||
754 | #endif | ||
755 | |||
748 | if (!ad->io_context) | 756 | if (!ad->io_context) |
749 | /* | 757 | /* |
750 | * Last request submitted was a write | 758 | * Last request submitted was a write |
@@ -844,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq) | |||
844 | if (ad->changed_batch && ad->nr_dispatched == 1) { | 852 | if (ad->changed_batch && ad->nr_dispatched == 1) { |
845 | ad->current_batch_expires = jiffies + | 853 | ad->current_batch_expires = jiffies + |
846 | ad->batch_expire[ad->batch_data_dir]; | 854 | ad->batch_expire[ad->batch_data_dir]; |
847 | kblockd_schedule_work(&ad->antic_work); | 855 | kblockd_schedule_work(q, &ad->antic_work); |
848 | ad->changed_batch = 0; | 856 | ad->changed_batch = 0; |
849 | 857 | ||
850 | if (ad->batch_data_dir == REQ_SYNC) | 858 | if (ad->batch_data_dir == REQ_SYNC) |
diff --git a/block/blk-barrier.c b/block/blk-barrier.c index a09ead19f9c5..5c99ff8d2db8 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c | |||
@@ -293,7 +293,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | |||
293 | bio->bi_end_io = bio_end_empty_barrier; | 293 | bio->bi_end_io = bio_end_empty_barrier; |
294 | bio->bi_private = &wait; | 294 | bio->bi_private = &wait; |
295 | bio->bi_bdev = bdev; | 295 | bio->bi_bdev = bdev; |
296 | submit_bio(1 << BIO_RW_BARRIER, bio); | 296 | submit_bio(WRITE_BARRIER, bio); |
297 | 297 | ||
298 | wait_for_completion(&wait); | 298 | wait_for_completion(&wait); |
299 | 299 | ||
@@ -315,3 +315,73 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) | |||
315 | return ret; | 315 | return ret; |
316 | } | 316 | } |
317 | EXPORT_SYMBOL(blkdev_issue_flush); | 317 | EXPORT_SYMBOL(blkdev_issue_flush); |
318 | |||
319 | static void blkdev_discard_end_io(struct bio *bio, int err) | ||
320 | { | ||
321 | if (err) { | ||
322 | if (err == -EOPNOTSUPP) | ||
323 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
324 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
325 | } | ||
326 | |||
327 | bio_put(bio); | ||
328 | } | ||
329 | |||
330 | /** | ||
331 | * blkdev_issue_discard - queue a discard | ||
332 | * @bdev: blockdev to issue discard for | ||
333 | * @sector: start sector | ||
334 | * @nr_sects: number of sectors to discard | ||
335 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
336 | * | ||
337 | * Description: | ||
338 | * Issue a discard request for the sectors in question. Does not wait. | ||
339 | */ | ||
340 | int blkdev_issue_discard(struct block_device *bdev, | ||
341 | sector_t sector, sector_t nr_sects, gfp_t gfp_mask) | ||
342 | { | ||
343 | struct request_queue *q; | ||
344 | struct bio *bio; | ||
345 | int ret = 0; | ||
346 | |||
347 | if (bdev->bd_disk == NULL) | ||
348 | return -ENXIO; | ||
349 | |||
350 | q = bdev_get_queue(bdev); | ||
351 | if (!q) | ||
352 | return -ENXIO; | ||
353 | |||
354 | if (!q->prepare_discard_fn) | ||
355 | return -EOPNOTSUPP; | ||
356 | |||
357 | while (nr_sects && !ret) { | ||
358 | bio = bio_alloc(gfp_mask, 0); | ||
359 | if (!bio) | ||
360 | return -ENOMEM; | ||
361 | |||
362 | bio->bi_end_io = blkdev_discard_end_io; | ||
363 | bio->bi_bdev = bdev; | ||
364 | |||
365 | bio->bi_sector = sector; | ||
366 | |||
367 | if (nr_sects > q->max_hw_sectors) { | ||
368 | bio->bi_size = q->max_hw_sectors << 9; | ||
369 | nr_sects -= q->max_hw_sectors; | ||
370 | sector += q->max_hw_sectors; | ||
371 | } else { | ||
372 | bio->bi_size = nr_sects << 9; | ||
373 | nr_sects = 0; | ||
374 | } | ||
375 | bio_get(bio); | ||
376 | submit_bio(DISCARD_BARRIER, bio); | ||
377 | |||
378 | /* Check if it failed immediately */ | ||
379 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
380 | ret = -EOPNOTSUPP; | ||
381 | else if (!bio_flagged(bio, BIO_UPTODATE)) | ||
382 | ret = -EIO; | ||
383 | bio_put(bio); | ||
384 | } | ||
385 | return ret; | ||
386 | } | ||
387 | EXPORT_SYMBOL(blkdev_issue_discard); | ||
diff --git a/block/blk-core.c b/block/blk-core.c index 2cba5ef97b2b..2d053b584410 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -26,8 +26,6 @@ | |||
26 | #include <linux/swap.h> | 26 | #include <linux/swap.h> |
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/task_io_accounting_ops.h> | 28 | #include <linux/task_io_accounting_ops.h> |
29 | #include <linux/interrupt.h> | ||
30 | #include <linux/cpu.h> | ||
31 | #include <linux/blktrace_api.h> | 29 | #include <linux/blktrace_api.h> |
32 | #include <linux/fault-inject.h> | 30 | #include <linux/fault-inject.h> |
33 | 31 | ||
@@ -50,27 +48,26 @@ struct kmem_cache *blk_requestq_cachep; | |||
50 | */ | 48 | */ |
51 | static struct workqueue_struct *kblockd_workqueue; | 49 | static struct workqueue_struct *kblockd_workqueue; |
52 | 50 | ||
53 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | ||
54 | |||
55 | static void drive_stat_acct(struct request *rq, int new_io) | 51 | static void drive_stat_acct(struct request *rq, int new_io) |
56 | { | 52 | { |
57 | struct hd_struct *part; | 53 | struct hd_struct *part; |
58 | int rw = rq_data_dir(rq); | 54 | int rw = rq_data_dir(rq); |
55 | int cpu; | ||
59 | 56 | ||
60 | if (!blk_fs_request(rq) || !rq->rq_disk) | 57 | if (!blk_fs_request(rq) || !rq->rq_disk) |
61 | return; | 58 | return; |
62 | 59 | ||
63 | part = get_part(rq->rq_disk, rq->sector); | 60 | cpu = part_stat_lock(); |
61 | part = disk_map_sector_rcu(rq->rq_disk, rq->sector); | ||
62 | |||
64 | if (!new_io) | 63 | if (!new_io) |
65 | __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); | 64 | part_stat_inc(cpu, part, merges[rw]); |
66 | else { | 65 | else { |
67 | disk_round_stats(rq->rq_disk); | 66 | part_round_stats(cpu, part); |
68 | rq->rq_disk->in_flight++; | 67 | part_inc_in_flight(part); |
69 | if (part) { | ||
70 | part_round_stats(part); | ||
71 | part->in_flight++; | ||
72 | } | ||
73 | } | 68 | } |
69 | |||
70 | part_stat_unlock(); | ||
74 | } | 71 | } |
75 | 72 | ||
76 | void blk_queue_congestion_threshold(struct request_queue *q) | 73 | void blk_queue_congestion_threshold(struct request_queue *q) |
@@ -113,7 +110,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |||
113 | memset(rq, 0, sizeof(*rq)); | 110 | memset(rq, 0, sizeof(*rq)); |
114 | 111 | ||
115 | INIT_LIST_HEAD(&rq->queuelist); | 112 | INIT_LIST_HEAD(&rq->queuelist); |
116 | INIT_LIST_HEAD(&rq->donelist); | 113 | INIT_LIST_HEAD(&rq->timeout_list); |
114 | rq->cpu = -1; | ||
117 | rq->q = q; | 115 | rq->q = q; |
118 | rq->sector = rq->hard_sector = (sector_t) -1; | 116 | rq->sector = rq->hard_sector = (sector_t) -1; |
119 | INIT_HLIST_NODE(&rq->hash); | 117 | INIT_HLIST_NODE(&rq->hash); |
@@ -308,7 +306,7 @@ void blk_unplug_timeout(unsigned long data) | |||
308 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, | 306 | blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, |
309 | q->rq.count[READ] + q->rq.count[WRITE]); | 307 | q->rq.count[READ] + q->rq.count[WRITE]); |
310 | 308 | ||
311 | kblockd_schedule_work(&q->unplug_work); | 309 | kblockd_schedule_work(q, &q->unplug_work); |
312 | } | 310 | } |
313 | 311 | ||
314 | void blk_unplug(struct request_queue *q) | 312 | void blk_unplug(struct request_queue *q) |
@@ -325,6 +323,21 @@ void blk_unplug(struct request_queue *q) | |||
325 | } | 323 | } |
326 | EXPORT_SYMBOL(blk_unplug); | 324 | EXPORT_SYMBOL(blk_unplug); |
327 | 325 | ||
326 | static void blk_invoke_request_fn(struct request_queue *q) | ||
327 | { | ||
328 | /* | ||
329 | * one level of recursion is ok and is much faster than kicking | ||
330 | * the unplug handling | ||
331 | */ | ||
332 | if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { | ||
333 | q->request_fn(q); | ||
334 | queue_flag_clear(QUEUE_FLAG_REENTER, q); | ||
335 | } else { | ||
336 | queue_flag_set(QUEUE_FLAG_PLUGGED, q); | ||
337 | kblockd_schedule_work(q, &q->unplug_work); | ||
338 | } | ||
339 | } | ||
340 | |||
328 | /** | 341 | /** |
329 | * blk_start_queue - restart a previously stopped queue | 342 | * blk_start_queue - restart a previously stopped queue |
330 | * @q: The &struct request_queue in question | 343 | * @q: The &struct request_queue in question |
@@ -339,18 +352,7 @@ void blk_start_queue(struct request_queue *q) | |||
339 | WARN_ON(!irqs_disabled()); | 352 | WARN_ON(!irqs_disabled()); |
340 | 353 | ||
341 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); | 354 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); |
342 | 355 | blk_invoke_request_fn(q); | |
343 | /* | ||
344 | * one level of recursion is ok and is much faster than kicking | ||
345 | * the unplug handling | ||
346 | */ | ||
347 | if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { | ||
348 | q->request_fn(q); | ||
349 | queue_flag_clear(QUEUE_FLAG_REENTER, q); | ||
350 | } else { | ||
351 | blk_plug_device(q); | ||
352 | kblockd_schedule_work(&q->unplug_work); | ||
353 | } | ||
354 | } | 356 | } |
355 | EXPORT_SYMBOL(blk_start_queue); | 357 | EXPORT_SYMBOL(blk_start_queue); |
356 | 358 | ||
@@ -408,15 +410,8 @@ void __blk_run_queue(struct request_queue *q) | |||
408 | * Only recurse once to avoid overrunning the stack, let the unplug | 410 | * Only recurse once to avoid overrunning the stack, let the unplug |
409 | * handling reinvoke the handler shortly if we already got there. | 411 | * handling reinvoke the handler shortly if we already got there. |
410 | */ | 412 | */ |
411 | if (!elv_queue_empty(q)) { | 413 | if (!elv_queue_empty(q)) |
412 | if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { | 414 | blk_invoke_request_fn(q); |
413 | q->request_fn(q); | ||
414 | queue_flag_clear(QUEUE_FLAG_REENTER, q); | ||
415 | } else { | ||
416 | blk_plug_device(q); | ||
417 | kblockd_schedule_work(&q->unplug_work); | ||
418 | } | ||
419 | } | ||
420 | } | 415 | } |
421 | EXPORT_SYMBOL(__blk_run_queue); | 416 | EXPORT_SYMBOL(__blk_run_queue); |
422 | 417 | ||
@@ -441,6 +436,14 @@ void blk_put_queue(struct request_queue *q) | |||
441 | 436 | ||
442 | void blk_cleanup_queue(struct request_queue *q) | 437 | void blk_cleanup_queue(struct request_queue *q) |
443 | { | 438 | { |
439 | /* | ||
440 | * We know we have process context here, so we can be a little | ||
441 | * cautious and ensure that pending block actions on this device | ||
442 | * are done before moving on. Going into this function, we should | ||
443 | * not have processes doing IO to this device. | ||
444 | */ | ||
445 | blk_sync_queue(q); | ||
446 | |||
444 | mutex_lock(&q->sysfs_lock); | 447 | mutex_lock(&q->sysfs_lock); |
445 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); | 448 | queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); |
446 | mutex_unlock(&q->sysfs_lock); | 449 | mutex_unlock(&q->sysfs_lock); |
@@ -496,6 +499,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
496 | } | 499 | } |
497 | 500 | ||
498 | init_timer(&q->unplug_timer); | 501 | init_timer(&q->unplug_timer); |
502 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); | ||
503 | INIT_LIST_HEAD(&q->timeout_list); | ||
499 | 504 | ||
500 | kobject_init(&q->kobj, &blk_queue_ktype); | 505 | kobject_init(&q->kobj, &blk_queue_ktype); |
501 | 506 | ||
@@ -531,7 +536,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node); | |||
531 | * request queue; this lock will be taken also from interrupt context, so irq | 536 | * request queue; this lock will be taken also from interrupt context, so irq |
532 | * disabling is needed for it. | 537 | * disabling is needed for it. |
533 | * | 538 | * |
534 | * Function returns a pointer to the initialized request queue, or NULL if | 539 | * Function returns a pointer to the initialized request queue, or %NULL if |
535 | * it didn't succeed. | 540 | * it didn't succeed. |
536 | * | 541 | * |
537 | * Note: | 542 | * Note: |
@@ -569,7 +574,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | |||
569 | q->request_fn = rfn; | 574 | q->request_fn = rfn; |
570 | q->prep_rq_fn = NULL; | 575 | q->prep_rq_fn = NULL; |
571 | q->unplug_fn = generic_unplug_device; | 576 | q->unplug_fn = generic_unplug_device; |
572 | q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); | 577 | q->queue_flags = (1 << QUEUE_FLAG_CLUSTER | |
578 | 1 << QUEUE_FLAG_STACKABLE); | ||
573 | q->queue_lock = lock; | 579 | q->queue_lock = lock; |
574 | 580 | ||
575 | blk_queue_segment_boundary(q, 0xffffffff); | 581 | blk_queue_segment_boundary(q, 0xffffffff); |
@@ -624,10 +630,6 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) | |||
624 | 630 | ||
625 | blk_rq_init(q, rq); | 631 | blk_rq_init(q, rq); |
626 | 632 | ||
627 | /* | ||
628 | * first three bits are identical in rq->cmd_flags and bio->bi_rw, | ||
629 | * see bio.h and blkdev.h | ||
630 | */ | ||
631 | rq->cmd_flags = rw | REQ_ALLOCED; | 633 | rq->cmd_flags = rw | REQ_ALLOCED; |
632 | 634 | ||
633 | if (priv) { | 635 | if (priv) { |
@@ -888,9 +890,11 @@ EXPORT_SYMBOL(blk_get_request); | |||
888 | */ | 890 | */ |
889 | void blk_start_queueing(struct request_queue *q) | 891 | void blk_start_queueing(struct request_queue *q) |
890 | { | 892 | { |
891 | if (!blk_queue_plugged(q)) | 893 | if (!blk_queue_plugged(q)) { |
894 | if (unlikely(blk_queue_stopped(q))) | ||
895 | return; | ||
892 | q->request_fn(q); | 896 | q->request_fn(q); |
893 | else | 897 | } else |
894 | __generic_unplug_device(q); | 898 | __generic_unplug_device(q); |
895 | } | 899 | } |
896 | EXPORT_SYMBOL(blk_start_queueing); | 900 | EXPORT_SYMBOL(blk_start_queueing); |
@@ -907,6 +911,8 @@ EXPORT_SYMBOL(blk_start_queueing); | |||
907 | */ | 911 | */ |
908 | void blk_requeue_request(struct request_queue *q, struct request *rq) | 912 | void blk_requeue_request(struct request_queue *q, struct request *rq) |
909 | { | 913 | { |
914 | blk_delete_timer(rq); | ||
915 | blk_clear_rq_complete(rq); | ||
910 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); | 916 | blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); |
911 | 917 | ||
912 | if (blk_rq_tagged(rq)) | 918 | if (blk_rq_tagged(rq)) |
@@ -917,7 +923,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) | |||
917 | EXPORT_SYMBOL(blk_requeue_request); | 923 | EXPORT_SYMBOL(blk_requeue_request); |
918 | 924 | ||
919 | /** | 925 | /** |
920 | * blk_insert_request - insert a special request in to a request queue | 926 | * blk_insert_request - insert a special request into a request queue |
921 | * @q: request queue where request should be inserted | 927 | * @q: request queue where request should be inserted |
922 | * @rq: request to be inserted | 928 | * @rq: request to be inserted |
923 | * @at_head: insert request at head or tail of queue | 929 | * @at_head: insert request at head or tail of queue |
@@ -927,8 +933,8 @@ EXPORT_SYMBOL(blk_requeue_request); | |||
927 | * Many block devices need to execute commands asynchronously, so they don't | 933 | * Many block devices need to execute commands asynchronously, so they don't |
928 | * block the whole kernel from preemption during request execution. This is | 934 | * block the whole kernel from preemption during request execution. This is |
929 | * accomplished normally by inserting aritficial requests tagged as | 935 | * accomplished normally by inserting aritficial requests tagged as |
930 | * REQ_SPECIAL in to the corresponding request queue, and letting them be | 936 | * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them |
931 | * scheduled for actual execution by the request queue. | 937 | * be scheduled for actual execution by the request queue. |
932 | * | 938 | * |
933 | * We have the option of inserting the head or the tail of the queue. | 939 | * We have the option of inserting the head or the tail of the queue. |
934 | * Typically we use the tail for new ioctls and so forth. We use the head | 940 | * Typically we use the tail for new ioctls and so forth. We use the head |
@@ -982,8 +988,22 @@ static inline void add_request(struct request_queue *q, struct request *req) | |||
982 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); | 988 | __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); |
983 | } | 989 | } |
984 | 990 | ||
985 | /* | 991 | static void part_round_stats_single(int cpu, struct hd_struct *part, |
986 | * disk_round_stats() - Round off the performance stats on a struct | 992 | unsigned long now) |
993 | { | ||
994 | if (now == part->stamp) | ||
995 | return; | ||
996 | |||
997 | if (part->in_flight) { | ||
998 | __part_stat_add(cpu, part, time_in_queue, | ||
999 | part->in_flight * (now - part->stamp)); | ||
1000 | __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); | ||
1001 | } | ||
1002 | part->stamp = now; | ||
1003 | } | ||
1004 | |||
1005 | /** | ||
1006 | * part_round_stats() - Round off the performance stats on a struct | ||
987 | * disk_stats. | 1007 | * disk_stats. |
988 | * | 1008 | * |
989 | * The average IO queue length and utilisation statistics are maintained | 1009 | * The average IO queue length and utilisation statistics are maintained |
@@ -997,36 +1017,15 @@ static inline void add_request(struct request_queue *q, struct request *req) | |||
997 | * /proc/diskstats. This accounts immediately for all queue usage up to | 1017 | * /proc/diskstats. This accounts immediately for all queue usage up to |
998 | * the current jiffies and restarts the counters again. | 1018 | * the current jiffies and restarts the counters again. |
999 | */ | 1019 | */ |
1000 | void disk_round_stats(struct gendisk *disk) | 1020 | void part_round_stats(int cpu, struct hd_struct *part) |
1001 | { | 1021 | { |
1002 | unsigned long now = jiffies; | 1022 | unsigned long now = jiffies; |
1003 | 1023 | ||
1004 | if (now == disk->stamp) | 1024 | if (part->partno) |
1005 | return; | 1025 | part_round_stats_single(cpu, &part_to_disk(part)->part0, now); |
1006 | 1026 | part_round_stats_single(cpu, part, now); | |
1007 | if (disk->in_flight) { | ||
1008 | __disk_stat_add(disk, time_in_queue, | ||
1009 | disk->in_flight * (now - disk->stamp)); | ||
1010 | __disk_stat_add(disk, io_ticks, (now - disk->stamp)); | ||
1011 | } | ||
1012 | disk->stamp = now; | ||
1013 | } | ||
1014 | EXPORT_SYMBOL_GPL(disk_round_stats); | ||
1015 | |||
1016 | void part_round_stats(struct hd_struct *part) | ||
1017 | { | ||
1018 | unsigned long now = jiffies; | ||
1019 | |||
1020 | if (now == part->stamp) | ||
1021 | return; | ||
1022 | |||
1023 | if (part->in_flight) { | ||
1024 | __part_stat_add(part, time_in_queue, | ||
1025 | part->in_flight * (now - part->stamp)); | ||
1026 | __part_stat_add(part, io_ticks, (now - part->stamp)); | ||
1027 | } | ||
1028 | part->stamp = now; | ||
1029 | } | 1027 | } |
1028 | EXPORT_SYMBOL_GPL(part_round_stats); | ||
1030 | 1029 | ||
1031 | /* | 1030 | /* |
1032 | * queue lock must be held | 1031 | * queue lock must be held |
@@ -1070,6 +1069,7 @@ EXPORT_SYMBOL(blk_put_request); | |||
1070 | 1069 | ||
1071 | void init_request_from_bio(struct request *req, struct bio *bio) | 1070 | void init_request_from_bio(struct request *req, struct bio *bio) |
1072 | { | 1071 | { |
1072 | req->cpu = bio->bi_comp_cpu; | ||
1073 | req->cmd_type = REQ_TYPE_FS; | 1073 | req->cmd_type = REQ_TYPE_FS; |
1074 | 1074 | ||
1075 | /* | 1075 | /* |
@@ -1081,7 +1081,12 @@ void init_request_from_bio(struct request *req, struct bio *bio) | |||
1081 | /* | 1081 | /* |
1082 | * REQ_BARRIER implies no merging, but lets make it explicit | 1082 | * REQ_BARRIER implies no merging, but lets make it explicit |
1083 | */ | 1083 | */ |
1084 | if (unlikely(bio_barrier(bio))) | 1084 | if (unlikely(bio_discard(bio))) { |
1085 | req->cmd_flags |= REQ_DISCARD; | ||
1086 | if (bio_barrier(bio)) | ||
1087 | req->cmd_flags |= REQ_SOFTBARRIER; | ||
1088 | req->q->prepare_discard_fn(req->q, req); | ||
1089 | } else if (unlikely(bio_barrier(bio))) | ||
1085 | req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); | 1090 | req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); |
1086 | 1091 | ||
1087 | if (bio_sync(bio)) | 1092 | if (bio_sync(bio)) |
@@ -1099,7 +1104,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) | |||
1099 | static int __make_request(struct request_queue *q, struct bio *bio) | 1104 | static int __make_request(struct request_queue *q, struct bio *bio) |
1100 | { | 1105 | { |
1101 | struct request *req; | 1106 | struct request *req; |
1102 | int el_ret, nr_sectors, barrier, err; | 1107 | int el_ret, nr_sectors, barrier, discard, err; |
1103 | const unsigned short prio = bio_prio(bio); | 1108 | const unsigned short prio = bio_prio(bio); |
1104 | const int sync = bio_sync(bio); | 1109 | const int sync = bio_sync(bio); |
1105 | int rw_flags; | 1110 | int rw_flags; |
@@ -1114,7 +1119,14 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1114 | blk_queue_bounce(q, &bio); | 1119 | blk_queue_bounce(q, &bio); |
1115 | 1120 | ||
1116 | barrier = bio_barrier(bio); | 1121 | barrier = bio_barrier(bio); |
1117 | if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { | 1122 | if (unlikely(barrier) && bio_has_data(bio) && |
1123 | (q->next_ordered == QUEUE_ORDERED_NONE)) { | ||
1124 | err = -EOPNOTSUPP; | ||
1125 | goto end_io; | ||
1126 | } | ||
1127 | |||
1128 | discard = bio_discard(bio); | ||
1129 | if (unlikely(discard) && !q->prepare_discard_fn) { | ||
1118 | err = -EOPNOTSUPP; | 1130 | err = -EOPNOTSUPP; |
1119 | goto end_io; | 1131 | goto end_io; |
1120 | } | 1132 | } |
@@ -1138,6 +1150,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1138 | req->biotail = bio; | 1150 | req->biotail = bio; |
1139 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; | 1151 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; |
1140 | req->ioprio = ioprio_best(req->ioprio, prio); | 1152 | req->ioprio = ioprio_best(req->ioprio, prio); |
1153 | if (!blk_rq_cpu_valid(req)) | ||
1154 | req->cpu = bio->bi_comp_cpu; | ||
1141 | drive_stat_acct(req, 0); | 1155 | drive_stat_acct(req, 0); |
1142 | if (!attempt_back_merge(q, req)) | 1156 | if (!attempt_back_merge(q, req)) |
1143 | elv_merged_request(q, req, el_ret); | 1157 | elv_merged_request(q, req, el_ret); |
@@ -1165,6 +1179,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1165 | req->sector = req->hard_sector = bio->bi_sector; | 1179 | req->sector = req->hard_sector = bio->bi_sector; |
1166 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; | 1180 | req->nr_sectors = req->hard_nr_sectors += nr_sectors; |
1167 | req->ioprio = ioprio_best(req->ioprio, prio); | 1181 | req->ioprio = ioprio_best(req->ioprio, prio); |
1182 | if (!blk_rq_cpu_valid(req)) | ||
1183 | req->cpu = bio->bi_comp_cpu; | ||
1168 | drive_stat_acct(req, 0); | 1184 | drive_stat_acct(req, 0); |
1169 | if (!attempt_front_merge(q, req)) | 1185 | if (!attempt_front_merge(q, req)) |
1170 | elv_merged_request(q, req, el_ret); | 1186 | elv_merged_request(q, req, el_ret); |
@@ -1200,13 +1216,15 @@ get_rq: | |||
1200 | init_request_from_bio(req, bio); | 1216 | init_request_from_bio(req, bio); |
1201 | 1217 | ||
1202 | spin_lock_irq(q->queue_lock); | 1218 | spin_lock_irq(q->queue_lock); |
1219 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || | ||
1220 | bio_flagged(bio, BIO_CPU_AFFINE)) | ||
1221 | req->cpu = blk_cpu_to_group(smp_processor_id()); | ||
1203 | if (elv_queue_empty(q)) | 1222 | if (elv_queue_empty(q)) |
1204 | blk_plug_device(q); | 1223 | blk_plug_device(q); |
1205 | add_request(q, req); | 1224 | add_request(q, req); |
1206 | out: | 1225 | out: |
1207 | if (sync) | 1226 | if (sync) |
1208 | __generic_unplug_device(q); | 1227 | __generic_unplug_device(q); |
1209 | |||
1210 | spin_unlock_irq(q->queue_lock); | 1228 | spin_unlock_irq(q->queue_lock); |
1211 | return 0; | 1229 | return 0; |
1212 | 1230 | ||
@@ -1260,8 +1278,9 @@ __setup("fail_make_request=", setup_fail_make_request); | |||
1260 | 1278 | ||
1261 | static int should_fail_request(struct bio *bio) | 1279 | static int should_fail_request(struct bio *bio) |
1262 | { | 1280 | { |
1263 | if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || | 1281 | struct hd_struct *part = bio->bi_bdev->bd_part; |
1264 | (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) | 1282 | |
1283 | if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) | ||
1265 | return should_fail(&fail_make_request, bio->bi_size); | 1284 | return should_fail(&fail_make_request, bio->bi_size); |
1266 | 1285 | ||
1267 | return 0; | 1286 | return 0; |
@@ -1314,7 +1333,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) | |||
1314 | } | 1333 | } |
1315 | 1334 | ||
1316 | /** | 1335 | /** |
1317 | * generic_make_request: hand a buffer to its device driver for I/O | 1336 | * generic_make_request - hand a buffer to its device driver for I/O |
1318 | * @bio: The bio describing the location in memory and on the device. | 1337 | * @bio: The bio describing the location in memory and on the device. |
1319 | * | 1338 | * |
1320 | * generic_make_request() is used to make I/O requests of block | 1339 | * generic_make_request() is used to make I/O requests of block |
@@ -1409,7 +1428,8 @@ end_io: | |||
1409 | 1428 | ||
1410 | if (bio_check_eod(bio, nr_sectors)) | 1429 | if (bio_check_eod(bio, nr_sectors)) |
1411 | goto end_io; | 1430 | goto end_io; |
1412 | if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { | 1431 | if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || |
1432 | (bio_discard(bio) && !q->prepare_discard_fn)) { | ||
1413 | err = -EOPNOTSUPP; | 1433 | err = -EOPNOTSUPP; |
1414 | goto end_io; | 1434 | goto end_io; |
1415 | } | 1435 | } |
@@ -1471,13 +1491,13 @@ void generic_make_request(struct bio *bio) | |||
1471 | EXPORT_SYMBOL(generic_make_request); | 1491 | EXPORT_SYMBOL(generic_make_request); |
1472 | 1492 | ||
1473 | /** | 1493 | /** |
1474 | * submit_bio: submit a bio to the block device layer for I/O | 1494 | * submit_bio - submit a bio to the block device layer for I/O |
1475 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) | 1495 | * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) |
1476 | * @bio: The &struct bio which describes the I/O | 1496 | * @bio: The &struct bio which describes the I/O |
1477 | * | 1497 | * |
1478 | * submit_bio() is very similar in purpose to generic_make_request(), and | 1498 | * submit_bio() is very similar in purpose to generic_make_request(), and |
1479 | * uses that function to do most of the work. Both are fairly rough | 1499 | * uses that function to do most of the work. Both are fairly rough |
1480 | * interfaces, @bio must be presetup and ready for I/O. | 1500 | * interfaces; @bio must be presetup and ready for I/O. |
1481 | * | 1501 | * |
1482 | */ | 1502 | */ |
1483 | void submit_bio(int rw, struct bio *bio) | 1503 | void submit_bio(int rw, struct bio *bio) |
@@ -1490,11 +1510,7 @@ void submit_bio(int rw, struct bio *bio) | |||
1490 | * If it's a regular read/write or a barrier with data attached, | 1510 | * If it's a regular read/write or a barrier with data attached, |
1491 | * go through the normal accounting stuff before submission. | 1511 | * go through the normal accounting stuff before submission. |
1492 | */ | 1512 | */ |
1493 | if (!bio_empty_barrier(bio)) { | 1513 | if (bio_has_data(bio)) { |
1494 | |||
1495 | BIO_BUG_ON(!bio->bi_size); | ||
1496 | BIO_BUG_ON(!bio->bi_io_vec); | ||
1497 | |||
1498 | if (rw & WRITE) { | 1514 | if (rw & WRITE) { |
1499 | count_vm_events(PGPGOUT, count); | 1515 | count_vm_events(PGPGOUT, count); |
1500 | } else { | 1516 | } else { |
@@ -1517,9 +1533,90 @@ void submit_bio(int rw, struct bio *bio) | |||
1517 | EXPORT_SYMBOL(submit_bio); | 1533 | EXPORT_SYMBOL(submit_bio); |
1518 | 1534 | ||
1519 | /** | 1535 | /** |
1536 | * blk_rq_check_limits - Helper function to check a request for the queue limit | ||
1537 | * @q: the queue | ||
1538 | * @rq: the request being checked | ||
1539 | * | ||
1540 | * Description: | ||
1541 | * @rq may have been made based on weaker limitations of upper-level queues | ||
1542 | * in request stacking drivers, and it may violate the limitation of @q. | ||
1543 | * Since the block layer and the underlying device driver trust @rq | ||
1544 | * after it is inserted to @q, it should be checked against @q before | ||
1545 | * the insertion using this generic function. | ||
1546 | * | ||
1547 | * This function should also be useful for request stacking drivers | ||
1548 | * in some cases below, so export this fuction. | ||
1549 | * Request stacking drivers like request-based dm may change the queue | ||
1550 | * limits while requests are in the queue (e.g. dm's table swapping). | ||
1551 | * Such request stacking drivers should check those requests agaist | ||
1552 | * the new queue limits again when they dispatch those requests, | ||
1553 | * although such checkings are also done against the old queue limits | ||
1554 | * when submitting requests. | ||
1555 | */ | ||
1556 | int blk_rq_check_limits(struct request_queue *q, struct request *rq) | ||
1557 | { | ||
1558 | if (rq->nr_sectors > q->max_sectors || | ||
1559 | rq->data_len > q->max_hw_sectors << 9) { | ||
1560 | printk(KERN_ERR "%s: over max size limit.\n", __func__); | ||
1561 | return -EIO; | ||
1562 | } | ||
1563 | |||
1564 | /* | ||
1565 | * queue's settings related to segment counting like q->bounce_pfn | ||
1566 | * may differ from that of other stacking queues. | ||
1567 | * Recalculate it to check the request correctly on this queue's | ||
1568 | * limitation. | ||
1569 | */ | ||
1570 | blk_recalc_rq_segments(rq); | ||
1571 | if (rq->nr_phys_segments > q->max_phys_segments || | ||
1572 | rq->nr_phys_segments > q->max_hw_segments) { | ||
1573 | printk(KERN_ERR "%s: over max segments limit.\n", __func__); | ||
1574 | return -EIO; | ||
1575 | } | ||
1576 | |||
1577 | return 0; | ||
1578 | } | ||
1579 | EXPORT_SYMBOL_GPL(blk_rq_check_limits); | ||
1580 | |||
1581 | /** | ||
1582 | * blk_insert_cloned_request - Helper for stacking drivers to submit a request | ||
1583 | * @q: the queue to submit the request | ||
1584 | * @rq: the request being queued | ||
1585 | */ | ||
1586 | int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | ||
1587 | { | ||
1588 | unsigned long flags; | ||
1589 | |||
1590 | if (blk_rq_check_limits(q, rq)) | ||
1591 | return -EIO; | ||
1592 | |||
1593 | #ifdef CONFIG_FAIL_MAKE_REQUEST | ||
1594 | if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && | ||
1595 | should_fail(&fail_make_request, blk_rq_bytes(rq))) | ||
1596 | return -EIO; | ||
1597 | #endif | ||
1598 | |||
1599 | spin_lock_irqsave(q->queue_lock, flags); | ||
1600 | |||
1601 | /* | ||
1602 | * Submitting request must be dequeued before calling this function | ||
1603 | * because it will be linked to another request_queue | ||
1604 | */ | ||
1605 | BUG_ON(blk_queued_rq(rq)); | ||
1606 | |||
1607 | drive_stat_acct(rq, 1); | ||
1608 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); | ||
1609 | |||
1610 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
1611 | |||
1612 | return 0; | ||
1613 | } | ||
1614 | EXPORT_SYMBOL_GPL(blk_insert_cloned_request); | ||
1615 | |||
1616 | /** | ||
1520 | * __end_that_request_first - end I/O on a request | 1617 | * __end_that_request_first - end I/O on a request |
1521 | * @req: the request being processed | 1618 | * @req: the request being processed |
1522 | * @error: 0 for success, < 0 for error | 1619 | * @error: %0 for success, < %0 for error |
1523 | * @nr_bytes: number of bytes to complete | 1620 | * @nr_bytes: number of bytes to complete |
1524 | * | 1621 | * |
1525 | * Description: | 1622 | * Description: |
@@ -1527,8 +1624,8 @@ EXPORT_SYMBOL(submit_bio); | |||
1527 | * for the next range of segments (if any) in the cluster. | 1624 | * for the next range of segments (if any) in the cluster. |
1528 | * | 1625 | * |
1529 | * Return: | 1626 | * Return: |
1530 | * 0 - we are done with this request, call end_that_request_last() | 1627 | * %0 - we are done with this request, call end_that_request_last() |
1531 | * 1 - still buffers pending for this request | 1628 | * %1 - still buffers pending for this request |
1532 | **/ | 1629 | **/ |
1533 | static int __end_that_request_first(struct request *req, int error, | 1630 | static int __end_that_request_first(struct request *req, int error, |
1534 | int nr_bytes) | 1631 | int nr_bytes) |
@@ -1539,7 +1636,7 @@ static int __end_that_request_first(struct request *req, int error, | |||
1539 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); | 1636 | blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); |
1540 | 1637 | ||
1541 | /* | 1638 | /* |
1542 | * for a REQ_BLOCK_PC request, we want to carry any eventual | 1639 | * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual |
1543 | * sense key with us all the way through | 1640 | * sense key with us all the way through |
1544 | */ | 1641 | */ |
1545 | if (!blk_pc_request(req)) | 1642 | if (!blk_pc_request(req)) |
@@ -1552,11 +1649,14 @@ static int __end_that_request_first(struct request *req, int error, | |||
1552 | } | 1649 | } |
1553 | 1650 | ||
1554 | if (blk_fs_request(req) && req->rq_disk) { | 1651 | if (blk_fs_request(req) && req->rq_disk) { |
1555 | struct hd_struct *part = get_part(req->rq_disk, req->sector); | ||
1556 | const int rw = rq_data_dir(req); | 1652 | const int rw = rq_data_dir(req); |
1653 | struct hd_struct *part; | ||
1654 | int cpu; | ||
1557 | 1655 | ||
1558 | all_stat_add(req->rq_disk, part, sectors[rw], | 1656 | cpu = part_stat_lock(); |
1559 | nr_bytes >> 9, req->sector); | 1657 | part = disk_map_sector_rcu(req->rq_disk, req->sector); |
1658 | part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9); | ||
1659 | part_stat_unlock(); | ||
1560 | } | 1660 | } |
1561 | 1661 | ||
1562 | total_bytes = bio_nbytes = 0; | 1662 | total_bytes = bio_nbytes = 0; |
@@ -1641,88 +1741,14 @@ static int __end_that_request_first(struct request *req, int error, | |||
1641 | } | 1741 | } |
1642 | 1742 | ||
1643 | /* | 1743 | /* |
1644 | * splice the completion data to a local structure and hand off to | ||
1645 | * process_completion_queue() to complete the requests | ||
1646 | */ | ||
1647 | static void blk_done_softirq(struct softirq_action *h) | ||
1648 | { | ||
1649 | struct list_head *cpu_list, local_list; | ||
1650 | |||
1651 | local_irq_disable(); | ||
1652 | cpu_list = &__get_cpu_var(blk_cpu_done); | ||
1653 | list_replace_init(cpu_list, &local_list); | ||
1654 | local_irq_enable(); | ||
1655 | |||
1656 | while (!list_empty(&local_list)) { | ||
1657 | struct request *rq; | ||
1658 | |||
1659 | rq = list_entry(local_list.next, struct request, donelist); | ||
1660 | list_del_init(&rq->donelist); | ||
1661 | rq->q->softirq_done_fn(rq); | ||
1662 | } | ||
1663 | } | ||
1664 | |||
1665 | static int __cpuinit blk_cpu_notify(struct notifier_block *self, | ||
1666 | unsigned long action, void *hcpu) | ||
1667 | { | ||
1668 | /* | ||
1669 | * If a CPU goes away, splice its entries to the current CPU | ||
1670 | * and trigger a run of the softirq | ||
1671 | */ | ||
1672 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | ||
1673 | int cpu = (unsigned long) hcpu; | ||
1674 | |||
1675 | local_irq_disable(); | ||
1676 | list_splice_init(&per_cpu(blk_cpu_done, cpu), | ||
1677 | &__get_cpu_var(blk_cpu_done)); | ||
1678 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | ||
1679 | local_irq_enable(); | ||
1680 | } | ||
1681 | |||
1682 | return NOTIFY_OK; | ||
1683 | } | ||
1684 | |||
1685 | |||
1686 | static struct notifier_block blk_cpu_notifier __cpuinitdata = { | ||
1687 | .notifier_call = blk_cpu_notify, | ||
1688 | }; | ||
1689 | |||
1690 | /** | ||
1691 | * blk_complete_request - end I/O on a request | ||
1692 | * @req: the request being processed | ||
1693 | * | ||
1694 | * Description: | ||
1695 | * Ends all I/O on a request. It does not handle partial completions, | ||
1696 | * unless the driver actually implements this in its completion callback | ||
1697 | * through requeueing. The actual completion happens out-of-order, | ||
1698 | * through a softirq handler. The user must have registered a completion | ||
1699 | * callback through blk_queue_softirq_done(). | ||
1700 | **/ | ||
1701 | |||
1702 | void blk_complete_request(struct request *req) | ||
1703 | { | ||
1704 | struct list_head *cpu_list; | ||
1705 | unsigned long flags; | ||
1706 | |||
1707 | BUG_ON(!req->q->softirq_done_fn); | ||
1708 | |||
1709 | local_irq_save(flags); | ||
1710 | |||
1711 | cpu_list = &__get_cpu_var(blk_cpu_done); | ||
1712 | list_add_tail(&req->donelist, cpu_list); | ||
1713 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | ||
1714 | |||
1715 | local_irq_restore(flags); | ||
1716 | } | ||
1717 | EXPORT_SYMBOL(blk_complete_request); | ||
1718 | |||
1719 | /* | ||
1720 | * queue lock must be held | 1744 | * queue lock must be held |
1721 | */ | 1745 | */ |
1722 | static void end_that_request_last(struct request *req, int error) | 1746 | static void end_that_request_last(struct request *req, int error) |
1723 | { | 1747 | { |
1724 | struct gendisk *disk = req->rq_disk; | 1748 | struct gendisk *disk = req->rq_disk; |
1725 | 1749 | ||
1750 | blk_delete_timer(req); | ||
1751 | |||
1726 | if (blk_rq_tagged(req)) | 1752 | if (blk_rq_tagged(req)) |
1727 | blk_queue_end_tag(req->q, req); | 1753 | blk_queue_end_tag(req->q, req); |
1728 | 1754 | ||
@@ -1740,16 +1766,18 @@ static void end_that_request_last(struct request *req, int error) | |||
1740 | if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { | 1766 | if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { |
1741 | unsigned long duration = jiffies - req->start_time; | 1767 | unsigned long duration = jiffies - req->start_time; |
1742 | const int rw = rq_data_dir(req); | 1768 | const int rw = rq_data_dir(req); |
1743 | struct hd_struct *part = get_part(disk, req->sector); | 1769 | struct hd_struct *part; |
1744 | 1770 | int cpu; | |
1745 | __all_stat_inc(disk, part, ios[rw], req->sector); | 1771 | |
1746 | __all_stat_add(disk, part, ticks[rw], duration, req->sector); | 1772 | cpu = part_stat_lock(); |
1747 | disk_round_stats(disk); | 1773 | part = disk_map_sector_rcu(disk, req->sector); |
1748 | disk->in_flight--; | 1774 | |
1749 | if (part) { | 1775 | part_stat_inc(cpu, part, ios[rw]); |
1750 | part_round_stats(part); | 1776 | part_stat_add(cpu, part, ticks[rw], duration); |
1751 | part->in_flight--; | 1777 | part_round_stats(cpu, part); |
1752 | } | 1778 | part_dec_in_flight(part); |
1779 | |||
1780 | part_stat_unlock(); | ||
1753 | } | 1781 | } |
1754 | 1782 | ||
1755 | if (req->end_io) | 1783 | if (req->end_io) |
@@ -1762,17 +1790,6 @@ static void end_that_request_last(struct request *req, int error) | |||
1762 | } | 1790 | } |
1763 | } | 1791 | } |
1764 | 1792 | ||
1765 | static inline void __end_request(struct request *rq, int uptodate, | ||
1766 | unsigned int nr_bytes) | ||
1767 | { | ||
1768 | int error = 0; | ||
1769 | |||
1770 | if (uptodate <= 0) | ||
1771 | error = uptodate ? uptodate : -EIO; | ||
1772 | |||
1773 | __blk_end_request(rq, error, nr_bytes); | ||
1774 | } | ||
1775 | |||
1776 | /** | 1793 | /** |
1777 | * blk_rq_bytes - Returns bytes left to complete in the entire request | 1794 | * blk_rq_bytes - Returns bytes left to complete in the entire request |
1778 | * @rq: the request being processed | 1795 | * @rq: the request being processed |
@@ -1803,74 +1820,57 @@ unsigned int blk_rq_cur_bytes(struct request *rq) | |||
1803 | EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); | 1820 | EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); |
1804 | 1821 | ||
1805 | /** | 1822 | /** |
1806 | * end_queued_request - end all I/O on a queued request | ||
1807 | * @rq: the request being processed | ||
1808 | * @uptodate: error value or 0/1 uptodate flag | ||
1809 | * | ||
1810 | * Description: | ||
1811 | * Ends all I/O on a request, and removes it from the block layer queues. | ||
1812 | * Not suitable for normal IO completion, unless the driver still has | ||
1813 | * the request attached to the block layer. | ||
1814 | * | ||
1815 | **/ | ||
1816 | void end_queued_request(struct request *rq, int uptodate) | ||
1817 | { | ||
1818 | __end_request(rq, uptodate, blk_rq_bytes(rq)); | ||
1819 | } | ||
1820 | EXPORT_SYMBOL(end_queued_request); | ||
1821 | |||
1822 | /** | ||
1823 | * end_dequeued_request - end all I/O on a dequeued request | ||
1824 | * @rq: the request being processed | ||
1825 | * @uptodate: error value or 0/1 uptodate flag | ||
1826 | * | ||
1827 | * Description: | ||
1828 | * Ends all I/O on a request. The request must already have been | ||
1829 | * dequeued using blkdev_dequeue_request(), as is normally the case | ||
1830 | * for most drivers. | ||
1831 | * | ||
1832 | **/ | ||
1833 | void end_dequeued_request(struct request *rq, int uptodate) | ||
1834 | { | ||
1835 | __end_request(rq, uptodate, blk_rq_bytes(rq)); | ||
1836 | } | ||
1837 | EXPORT_SYMBOL(end_dequeued_request); | ||
1838 | |||
1839 | |||
1840 | /** | ||
1841 | * end_request - end I/O on the current segment of the request | 1823 | * end_request - end I/O on the current segment of the request |
1842 | * @req: the request being processed | 1824 | * @req: the request being processed |
1843 | * @uptodate: error value or 0/1 uptodate flag | 1825 | * @uptodate: error value or %0/%1 uptodate flag |
1844 | * | 1826 | * |
1845 | * Description: | 1827 | * Description: |
1846 | * Ends I/O on the current segment of a request. If that is the only | 1828 | * Ends I/O on the current segment of a request. If that is the only |
1847 | * remaining segment, the request is also completed and freed. | 1829 | * remaining segment, the request is also completed and freed. |
1848 | * | 1830 | * |
1849 | * This is a remnant of how older block drivers handled IO completions. | 1831 | * This is a remnant of how older block drivers handled I/O completions. |
1850 | * Modern drivers typically end IO on the full request in one go, unless | 1832 | * Modern drivers typically end I/O on the full request in one go, unless |
1851 | * they have a residual value to account for. For that case this function | 1833 | * they have a residual value to account for. For that case this function |
1852 | * isn't really useful, unless the residual just happens to be the | 1834 | * isn't really useful, unless the residual just happens to be the |
1853 | * full current segment. In other words, don't use this function in new | 1835 | * full current segment. In other words, don't use this function in new |
1854 | * code. Either use end_request_completely(), or the | 1836 | * code. Use blk_end_request() or __blk_end_request() to end a request. |
1855 | * end_that_request_chunk() (along with end_that_request_last()) for | ||
1856 | * partial completions. | ||
1857 | * | ||
1858 | **/ | 1837 | **/ |
1859 | void end_request(struct request *req, int uptodate) | 1838 | void end_request(struct request *req, int uptodate) |
1860 | { | 1839 | { |
1861 | __end_request(req, uptodate, req->hard_cur_sectors << 9); | 1840 | int error = 0; |
1841 | |||
1842 | if (uptodate <= 0) | ||
1843 | error = uptodate ? uptodate : -EIO; | ||
1844 | |||
1845 | __blk_end_request(req, error, req->hard_cur_sectors << 9); | ||
1862 | } | 1846 | } |
1863 | EXPORT_SYMBOL(end_request); | 1847 | EXPORT_SYMBOL(end_request); |
1864 | 1848 | ||
1849 | static int end_that_request_data(struct request *rq, int error, | ||
1850 | unsigned int nr_bytes, unsigned int bidi_bytes) | ||
1851 | { | ||
1852 | if (rq->bio) { | ||
1853 | if (__end_that_request_first(rq, error, nr_bytes)) | ||
1854 | return 1; | ||
1855 | |||
1856 | /* Bidi request must be completed as a whole */ | ||
1857 | if (blk_bidi_rq(rq) && | ||
1858 | __end_that_request_first(rq->next_rq, error, bidi_bytes)) | ||
1859 | return 1; | ||
1860 | } | ||
1861 | |||
1862 | return 0; | ||
1863 | } | ||
1864 | |||
1865 | /** | 1865 | /** |
1866 | * blk_end_io - Generic end_io function to complete a request. | 1866 | * blk_end_io - Generic end_io function to complete a request. |
1867 | * @rq: the request being processed | 1867 | * @rq: the request being processed |
1868 | * @error: 0 for success, < 0 for error | 1868 | * @error: %0 for success, < %0 for error |
1869 | * @nr_bytes: number of bytes to complete @rq | 1869 | * @nr_bytes: number of bytes to complete @rq |
1870 | * @bidi_bytes: number of bytes to complete @rq->next_rq | 1870 | * @bidi_bytes: number of bytes to complete @rq->next_rq |
1871 | * @drv_callback: function called between completion of bios in the request | 1871 | * @drv_callback: function called between completion of bios in the request |
1872 | * and completion of the request. | 1872 | * and completion of the request. |
1873 | * If the callback returns non 0, this helper returns without | 1873 | * If the callback returns non %0, this helper returns without |
1874 | * completion of the request. | 1874 | * completion of the request. |
1875 | * | 1875 | * |
1876 | * Description: | 1876 | * Description: |
@@ -1878,8 +1878,8 @@ EXPORT_SYMBOL(end_request); | |||
1878 | * If @rq has leftover, sets it up for the next range of segments. | 1878 | * If @rq has leftover, sets it up for the next range of segments. |
1879 | * | 1879 | * |
1880 | * Return: | 1880 | * Return: |
1881 | * 0 - we are done with this request | 1881 | * %0 - we are done with this request |
1882 | * 1 - this request is not freed yet, it still has pending buffers. | 1882 | * %1 - this request is not freed yet, it still has pending buffers. |
1883 | **/ | 1883 | **/ |
1884 | static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, | 1884 | static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, |
1885 | unsigned int bidi_bytes, | 1885 | unsigned int bidi_bytes, |
@@ -1888,15 +1888,8 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, | |||
1888 | struct request_queue *q = rq->q; | 1888 | struct request_queue *q = rq->q; |
1889 | unsigned long flags = 0UL; | 1889 | unsigned long flags = 0UL; |
1890 | 1890 | ||
1891 | if (blk_fs_request(rq) || blk_pc_request(rq)) { | 1891 | if (end_that_request_data(rq, error, nr_bytes, bidi_bytes)) |
1892 | if (__end_that_request_first(rq, error, nr_bytes)) | 1892 | return 1; |
1893 | return 1; | ||
1894 | |||
1895 | /* Bidi request must be completed as a whole */ | ||
1896 | if (blk_bidi_rq(rq) && | ||
1897 | __end_that_request_first(rq->next_rq, error, bidi_bytes)) | ||
1898 | return 1; | ||
1899 | } | ||
1900 | 1893 | ||
1901 | /* Special feature for tricky drivers */ | 1894 | /* Special feature for tricky drivers */ |
1902 | if (drv_callback && drv_callback(rq)) | 1895 | if (drv_callback && drv_callback(rq)) |
@@ -1914,7 +1907,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, | |||
1914 | /** | 1907 | /** |
1915 | * blk_end_request - Helper function for drivers to complete the request. | 1908 | * blk_end_request - Helper function for drivers to complete the request. |
1916 | * @rq: the request being processed | 1909 | * @rq: the request being processed |
1917 | * @error: 0 for success, < 0 for error | 1910 | * @error: %0 for success, < %0 for error |
1918 | * @nr_bytes: number of bytes to complete | 1911 | * @nr_bytes: number of bytes to complete |
1919 | * | 1912 | * |
1920 | * Description: | 1913 | * Description: |
@@ -1922,8 +1915,8 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, | |||
1922 | * If @rq has leftover, sets it up for the next range of segments. | 1915 | * If @rq has leftover, sets it up for the next range of segments. |
1923 | * | 1916 | * |
1924 | * Return: | 1917 | * Return: |
1925 | * 0 - we are done with this request | 1918 | * %0 - we are done with this request |
1926 | * 1 - still buffers pending for this request | 1919 | * %1 - still buffers pending for this request |
1927 | **/ | 1920 | **/ |
1928 | int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | 1921 | int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) |
1929 | { | 1922 | { |
@@ -1934,22 +1927,20 @@ EXPORT_SYMBOL_GPL(blk_end_request); | |||
1934 | /** | 1927 | /** |
1935 | * __blk_end_request - Helper function for drivers to complete the request. | 1928 | * __blk_end_request - Helper function for drivers to complete the request. |
1936 | * @rq: the request being processed | 1929 | * @rq: the request being processed |
1937 | * @error: 0 for success, < 0 for error | 1930 | * @error: %0 for success, < %0 for error |
1938 | * @nr_bytes: number of bytes to complete | 1931 | * @nr_bytes: number of bytes to complete |
1939 | * | 1932 | * |
1940 | * Description: | 1933 | * Description: |
1941 | * Must be called with queue lock held unlike blk_end_request(). | 1934 | * Must be called with queue lock held unlike blk_end_request(). |
1942 | * | 1935 | * |
1943 | * Return: | 1936 | * Return: |
1944 | * 0 - we are done with this request | 1937 | * %0 - we are done with this request |
1945 | * 1 - still buffers pending for this request | 1938 | * %1 - still buffers pending for this request |
1946 | **/ | 1939 | **/ |
1947 | int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) | 1940 | int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) |
1948 | { | 1941 | { |
1949 | if (blk_fs_request(rq) || blk_pc_request(rq)) { | 1942 | if (rq->bio && __end_that_request_first(rq, error, nr_bytes)) |
1950 | if (__end_that_request_first(rq, error, nr_bytes)) | 1943 | return 1; |
1951 | return 1; | ||
1952 | } | ||
1953 | 1944 | ||
1954 | add_disk_randomness(rq->rq_disk); | 1945 | add_disk_randomness(rq->rq_disk); |
1955 | 1946 | ||
@@ -1962,7 +1953,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request); | |||
1962 | /** | 1953 | /** |
1963 | * blk_end_bidi_request - Helper function for drivers to complete bidi request. | 1954 | * blk_end_bidi_request - Helper function for drivers to complete bidi request. |
1964 | * @rq: the bidi request being processed | 1955 | * @rq: the bidi request being processed |
1965 | * @error: 0 for success, < 0 for error | 1956 | * @error: %0 for success, < %0 for error |
1966 | * @nr_bytes: number of bytes to complete @rq | 1957 | * @nr_bytes: number of bytes to complete @rq |
1967 | * @bidi_bytes: number of bytes to complete @rq->next_rq | 1958 | * @bidi_bytes: number of bytes to complete @rq->next_rq |
1968 | * | 1959 | * |
@@ -1970,8 +1961,8 @@ EXPORT_SYMBOL_GPL(__blk_end_request); | |||
1970 | * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. | 1961 | * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. |
1971 | * | 1962 | * |
1972 | * Return: | 1963 | * Return: |
1973 | * 0 - we are done with this request | 1964 | * %0 - we are done with this request |
1974 | * 1 - still buffers pending for this request | 1965 | * %1 - still buffers pending for this request |
1975 | **/ | 1966 | **/ |
1976 | int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, | 1967 | int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, |
1977 | unsigned int bidi_bytes) | 1968 | unsigned int bidi_bytes) |
@@ -1981,13 +1972,43 @@ int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, | |||
1981 | EXPORT_SYMBOL_GPL(blk_end_bidi_request); | 1972 | EXPORT_SYMBOL_GPL(blk_end_bidi_request); |
1982 | 1973 | ||
1983 | /** | 1974 | /** |
1975 | * blk_update_request - Special helper function for request stacking drivers | ||
1976 | * @rq: the request being processed | ||
1977 | * @error: %0 for success, < %0 for error | ||
1978 | * @nr_bytes: number of bytes to complete @rq | ||
1979 | * | ||
1980 | * Description: | ||
1981 | * Ends I/O on a number of bytes attached to @rq, but doesn't complete | ||
1982 | * the request structure even if @rq doesn't have leftover. | ||
1983 | * If @rq has leftover, sets it up for the next range of segments. | ||
1984 | * | ||
1985 | * This special helper function is only for request stacking drivers | ||
1986 | * (e.g. request-based dm) so that they can handle partial completion. | ||
1987 | * Actual device drivers should use blk_end_request instead. | ||
1988 | */ | ||
1989 | void blk_update_request(struct request *rq, int error, unsigned int nr_bytes) | ||
1990 | { | ||
1991 | if (!end_that_request_data(rq, error, nr_bytes, 0)) { | ||
1992 | /* | ||
1993 | * These members are not updated in end_that_request_data() | ||
1994 | * when all bios are completed. | ||
1995 | * Update them so that the request stacking driver can find | ||
1996 | * how many bytes remain in the request later. | ||
1997 | */ | ||
1998 | rq->nr_sectors = rq->hard_nr_sectors = 0; | ||
1999 | rq->current_nr_sectors = rq->hard_cur_sectors = 0; | ||
2000 | } | ||
2001 | } | ||
2002 | EXPORT_SYMBOL_GPL(blk_update_request); | ||
2003 | |||
2004 | /** | ||
1984 | * blk_end_request_callback - Special helper function for tricky drivers | 2005 | * blk_end_request_callback - Special helper function for tricky drivers |
1985 | * @rq: the request being processed | 2006 | * @rq: the request being processed |
1986 | * @error: 0 for success, < 0 for error | 2007 | * @error: %0 for success, < %0 for error |
1987 | * @nr_bytes: number of bytes to complete | 2008 | * @nr_bytes: number of bytes to complete |
1988 | * @drv_callback: function called between completion of bios in the request | 2009 | * @drv_callback: function called between completion of bios in the request |
1989 | * and completion of the request. | 2010 | * and completion of the request. |
1990 | * If the callback returns non 0, this helper returns without | 2011 | * If the callback returns non %0, this helper returns without |
1991 | * completion of the request. | 2012 | * completion of the request. |
1992 | * | 2013 | * |
1993 | * Description: | 2014 | * Description: |
@@ -2000,10 +2021,10 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request); | |||
2000 | * Don't use this interface in other places anymore. | 2021 | * Don't use this interface in other places anymore. |
2001 | * | 2022 | * |
2002 | * Return: | 2023 | * Return: |
2003 | * 0 - we are done with this request | 2024 | * %0 - we are done with this request |
2004 | * 1 - this request is not freed yet. | 2025 | * %1 - this request is not freed yet. |
2005 | * this request still has pending buffers or | 2026 | * this request still has pending buffers or |
2006 | * the driver doesn't want to finish this request yet. | 2027 | * the driver doesn't want to finish this request yet. |
2007 | **/ | 2028 | **/ |
2008 | int blk_end_request_callback(struct request *rq, int error, | 2029 | int blk_end_request_callback(struct request *rq, int error, |
2009 | unsigned int nr_bytes, | 2030 | unsigned int nr_bytes, |
@@ -2016,15 +2037,17 @@ EXPORT_SYMBOL_GPL(blk_end_request_callback); | |||
2016 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | 2037 | void blk_rq_bio_prep(struct request_queue *q, struct request *rq, |
2017 | struct bio *bio) | 2038 | struct bio *bio) |
2018 | { | 2039 | { |
2019 | /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ | 2040 | /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and |
2041 | we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ | ||
2020 | rq->cmd_flags |= (bio->bi_rw & 3); | 2042 | rq->cmd_flags |= (bio->bi_rw & 3); |
2021 | 2043 | ||
2022 | rq->nr_phys_segments = bio_phys_segments(q, bio); | 2044 | if (bio_has_data(bio)) { |
2023 | rq->nr_hw_segments = bio_hw_segments(q, bio); | 2045 | rq->nr_phys_segments = bio_phys_segments(q, bio); |
2046 | rq->buffer = bio_data(bio); | ||
2047 | } | ||
2024 | rq->current_nr_sectors = bio_cur_sectors(bio); | 2048 | rq->current_nr_sectors = bio_cur_sectors(bio); |
2025 | rq->hard_cur_sectors = rq->current_nr_sectors; | 2049 | rq->hard_cur_sectors = rq->current_nr_sectors; |
2026 | rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); | 2050 | rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); |
2027 | rq->buffer = bio_data(bio); | ||
2028 | rq->data_len = bio->bi_size; | 2051 | rq->data_len = bio->bi_size; |
2029 | 2052 | ||
2030 | rq->bio = rq->biotail = bio; | 2053 | rq->bio = rq->biotail = bio; |
@@ -2033,7 +2056,35 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | |||
2033 | rq->rq_disk = bio->bi_bdev->bd_disk; | 2056 | rq->rq_disk = bio->bi_bdev->bd_disk; |
2034 | } | 2057 | } |
2035 | 2058 | ||
2036 | int kblockd_schedule_work(struct work_struct *work) | 2059 | /** |
2060 | * blk_lld_busy - Check if underlying low-level drivers of a device are busy | ||
2061 | * @q : the queue of the device being checked | ||
2062 | * | ||
2063 | * Description: | ||
2064 | * Check if underlying low-level drivers of a device are busy. | ||
2065 | * If the drivers want to export their busy state, they must set own | ||
2066 | * exporting function using blk_queue_lld_busy() first. | ||
2067 | * | ||
2068 | * Basically, this function is used only by request stacking drivers | ||
2069 | * to stop dispatching requests to underlying devices when underlying | ||
2070 | * devices are busy. This behavior helps more I/O merging on the queue | ||
2071 | * of the request stacking driver and prevents I/O throughput regression | ||
2072 | * on burst I/O load. | ||
2073 | * | ||
2074 | * Return: | ||
2075 | * 0 - Not busy (The request stacking driver should dispatch request) | ||
2076 | * 1 - Busy (The request stacking driver should stop dispatching request) | ||
2077 | */ | ||
2078 | int blk_lld_busy(struct request_queue *q) | ||
2079 | { | ||
2080 | if (q->lld_busy_fn) | ||
2081 | return q->lld_busy_fn(q); | ||
2082 | |||
2083 | return 0; | ||
2084 | } | ||
2085 | EXPORT_SYMBOL_GPL(blk_lld_busy); | ||
2086 | |||
2087 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) | ||
2037 | { | 2088 | { |
2038 | return queue_work(kblockd_workqueue, work); | 2089 | return queue_work(kblockd_workqueue, work); |
2039 | } | 2090 | } |
@@ -2047,8 +2098,6 @@ EXPORT_SYMBOL(kblockd_flush_work); | |||
2047 | 2098 | ||
2048 | int __init blk_dev_init(void) | 2099 | int __init blk_dev_init(void) |
2049 | { | 2100 | { |
2050 | int i; | ||
2051 | |||
2052 | kblockd_workqueue = create_workqueue("kblockd"); | 2101 | kblockd_workqueue = create_workqueue("kblockd"); |
2053 | if (!kblockd_workqueue) | 2102 | if (!kblockd_workqueue) |
2054 | panic("Failed to create kblockd\n"); | 2103 | panic("Failed to create kblockd\n"); |
@@ -2059,12 +2108,6 @@ int __init blk_dev_init(void) | |||
2059 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", | 2108 | blk_requestq_cachep = kmem_cache_create("blkdev_queue", |
2060 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); | 2109 | sizeof(struct request_queue), 0, SLAB_PANIC, NULL); |
2061 | 2110 | ||
2062 | for_each_possible_cpu(i) | ||
2063 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | ||
2064 | |||
2065 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); | ||
2066 | register_hotcpu_notifier(&blk_cpu_notifier); | ||
2067 | |||
2068 | return 0; | 2111 | return 0; |
2069 | } | 2112 | } |
2070 | 2113 | ||
diff --git a/block/blk-exec.c b/block/blk-exec.c index 9bceff7674f2..6af716d1e54e 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c | |||
@@ -16,7 +16,7 @@ | |||
16 | /** | 16 | /** |
17 | * blk_end_sync_rq - executes a completion event on a request | 17 | * blk_end_sync_rq - executes a completion event on a request |
18 | * @rq: request to complete | 18 | * @rq: request to complete |
19 | * @error: end io status of the request | 19 | * @error: end I/O status of the request |
20 | */ | 20 | */ |
21 | static void blk_end_sync_rq(struct request *rq, int error) | 21 | static void blk_end_sync_rq(struct request *rq, int error) |
22 | { | 22 | { |
@@ -41,7 +41,7 @@ static void blk_end_sync_rq(struct request *rq, int error) | |||
41 | * @done: I/O completion handler | 41 | * @done: I/O completion handler |
42 | * | 42 | * |
43 | * Description: | 43 | * Description: |
44 | * Insert a fully prepared request at the back of the io scheduler queue | 44 | * Insert a fully prepared request at the back of the I/O scheduler queue |
45 | * for execution. Don't wait for completion. | 45 | * for execution. Don't wait for completion. |
46 | */ | 46 | */ |
47 | void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | 47 | void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, |
@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); | |||
72 | * @at_head: insert request at head or tail of queue | 72 | * @at_head: insert request at head or tail of queue |
73 | * | 73 | * |
74 | * Description: | 74 | * Description: |
75 | * Insert a fully prepared request at the back of the io scheduler queue | 75 | * Insert a fully prepared request at the back of the I/O scheduler queue |
76 | * for execution and wait for completion. | 76 | * for execution and wait for completion. |
77 | */ | 77 | */ |
78 | int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, | 78 | int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, |
diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 3f1a8478cc38..61a8e2f8fdd0 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c | |||
@@ -108,51 +108,51 @@ new_segment: | |||
108 | EXPORT_SYMBOL(blk_rq_map_integrity_sg); | 108 | EXPORT_SYMBOL(blk_rq_map_integrity_sg); |
109 | 109 | ||
110 | /** | 110 | /** |
111 | * blk_integrity_compare - Compare integrity profile of two block devices | 111 | * blk_integrity_compare - Compare integrity profile of two disks |
112 | * @b1: Device to compare | 112 | * @gd1: Disk to compare |
113 | * @b2: Device to compare | 113 | * @gd2: Disk to compare |
114 | * | 114 | * |
115 | * Description: Meta-devices like DM and MD need to verify that all | 115 | * Description: Meta-devices like DM and MD need to verify that all |
116 | * sub-devices use the same integrity format before advertising to | 116 | * sub-devices use the same integrity format before advertising to |
117 | * upper layers that they can send/receive integrity metadata. This | 117 | * upper layers that they can send/receive integrity metadata. This |
118 | * function can be used to check whether two block devices have | 118 | * function can be used to check whether two gendisk devices have |
119 | * compatible integrity formats. | 119 | * compatible integrity formats. |
120 | */ | 120 | */ |
121 | int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2) | 121 | int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) |
122 | { | 122 | { |
123 | struct blk_integrity *b1 = bd1->bd_disk->integrity; | 123 | struct blk_integrity *b1 = gd1->integrity; |
124 | struct blk_integrity *b2 = bd2->bd_disk->integrity; | 124 | struct blk_integrity *b2 = gd2->integrity; |
125 | 125 | ||
126 | BUG_ON(bd1->bd_disk == NULL); | 126 | if (!b1 && !b2) |
127 | BUG_ON(bd2->bd_disk == NULL); | 127 | return 0; |
128 | 128 | ||
129 | if (!b1 || !b2) | 129 | if (!b1 || !b2) |
130 | return 0; | 130 | return -1; |
131 | 131 | ||
132 | if (b1->sector_size != b2->sector_size) { | 132 | if (b1->sector_size != b2->sector_size) { |
133 | printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__, | 133 | printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__, |
134 | bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, | 134 | gd1->disk_name, gd2->disk_name, |
135 | b1->sector_size, b2->sector_size); | 135 | b1->sector_size, b2->sector_size); |
136 | return -1; | 136 | return -1; |
137 | } | 137 | } |
138 | 138 | ||
139 | if (b1->tuple_size != b2->tuple_size) { | 139 | if (b1->tuple_size != b2->tuple_size) { |
140 | printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__, | 140 | printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__, |
141 | bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, | 141 | gd1->disk_name, gd2->disk_name, |
142 | b1->tuple_size, b2->tuple_size); | 142 | b1->tuple_size, b2->tuple_size); |
143 | return -1; | 143 | return -1; |
144 | } | 144 | } |
145 | 145 | ||
146 | if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) { | 146 | if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) { |
147 | printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__, | 147 | printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__, |
148 | bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, | 148 | gd1->disk_name, gd2->disk_name, |
149 | b1->tag_size, b2->tag_size); | 149 | b1->tag_size, b2->tag_size); |
150 | return -1; | 150 | return -1; |
151 | } | 151 | } |
152 | 152 | ||
153 | if (strcmp(b1->name, b2->name)) { | 153 | if (strcmp(b1->name, b2->name)) { |
154 | printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__, | 154 | printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__, |
155 | bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, | 155 | gd1->disk_name, gd2->disk_name, |
156 | b1->name, b2->name); | 156 | b1->name, b2->name); |
157 | return -1; | 157 | return -1; |
158 | } | 158 | } |
@@ -331,7 +331,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) | |||
331 | return -1; | 331 | return -1; |
332 | 332 | ||
333 | if (kobject_init_and_add(&bi->kobj, &integrity_ktype, | 333 | if (kobject_init_and_add(&bi->kobj, &integrity_ktype, |
334 | &disk->dev.kobj, "%s", "integrity")) { | 334 | &disk_to_dev(disk)->kobj, |
335 | "%s", "integrity")) { | ||
335 | kmem_cache_free(integrity_cachep, bi); | 336 | kmem_cache_free(integrity_cachep, bi); |
336 | return -1; | 337 | return -1; |
337 | } | 338 | } |
@@ -375,7 +376,7 @@ void blk_integrity_unregister(struct gendisk *disk) | |||
375 | 376 | ||
376 | kobject_uevent(&bi->kobj, KOBJ_REMOVE); | 377 | kobject_uevent(&bi->kobj, KOBJ_REMOVE); |
377 | kobject_del(&bi->kobj); | 378 | kobject_del(&bi->kobj); |
378 | kobject_put(&disk->dev.kobj); | ||
379 | kmem_cache_free(integrity_cachep, bi); | 379 | kmem_cache_free(integrity_cachep, bi); |
380 | disk->integrity = NULL; | ||
380 | } | 381 | } |
381 | EXPORT_SYMBOL(blk_integrity_unregister); | 382 | EXPORT_SYMBOL(blk_integrity_unregister); |
diff --git a/block/blk-map.c b/block/blk-map.c index af37e4ae62f5..4849fa36161e 100644 --- a/block/blk-map.c +++ b/block/blk-map.c | |||
@@ -41,10 +41,10 @@ static int __blk_rq_unmap_user(struct bio *bio) | |||
41 | } | 41 | } |
42 | 42 | ||
43 | static int __blk_rq_map_user(struct request_queue *q, struct request *rq, | 43 | static int __blk_rq_map_user(struct request_queue *q, struct request *rq, |
44 | void __user *ubuf, unsigned int len) | 44 | struct rq_map_data *map_data, void __user *ubuf, |
45 | unsigned int len, int null_mapped, gfp_t gfp_mask) | ||
45 | { | 46 | { |
46 | unsigned long uaddr; | 47 | unsigned long uaddr; |
47 | unsigned int alignment; | ||
48 | struct bio *bio, *orig_bio; | 48 | struct bio *bio, *orig_bio; |
49 | int reading, ret; | 49 | int reading, ret; |
50 | 50 | ||
@@ -55,15 +55,17 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, | |||
55 | * direct dma. else, set up kernel bounce buffers | 55 | * direct dma. else, set up kernel bounce buffers |
56 | */ | 56 | */ |
57 | uaddr = (unsigned long) ubuf; | 57 | uaddr = (unsigned long) ubuf; |
58 | alignment = queue_dma_alignment(q) | q->dma_pad_mask; | 58 | if (blk_rq_aligned(q, ubuf, len) && !map_data) |
59 | if (!(uaddr & alignment) && !(len & alignment)) | 59 | bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); |
60 | bio = bio_map_user(q, NULL, uaddr, len, reading); | ||
61 | else | 60 | else |
62 | bio = bio_copy_user(q, uaddr, len, reading); | 61 | bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask); |
63 | 62 | ||
64 | if (IS_ERR(bio)) | 63 | if (IS_ERR(bio)) |
65 | return PTR_ERR(bio); | 64 | return PTR_ERR(bio); |
66 | 65 | ||
66 | if (null_mapped) | ||
67 | bio->bi_flags |= (1 << BIO_NULL_MAPPED); | ||
68 | |||
67 | orig_bio = bio; | 69 | orig_bio = bio; |
68 | blk_queue_bounce(q, &bio); | 70 | blk_queue_bounce(q, &bio); |
69 | 71 | ||
@@ -85,17 +87,19 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, | |||
85 | } | 87 | } |
86 | 88 | ||
87 | /** | 89 | /** |
88 | * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage | 90 | * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage |
89 | * @q: request queue where request should be inserted | 91 | * @q: request queue where request should be inserted |
90 | * @rq: request structure to fill | 92 | * @rq: request structure to fill |
93 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
91 | * @ubuf: the user buffer | 94 | * @ubuf: the user buffer |
92 | * @len: length of user data | 95 | * @len: length of user data |
96 | * @gfp_mask: memory allocation flags | ||
93 | * | 97 | * |
94 | * Description: | 98 | * Description: |
95 | * Data will be mapped directly for zero copy io, if possible. Otherwise | 99 | * Data will be mapped directly for zero copy I/O, if possible. Otherwise |
96 | * a kernel bounce buffer is used. | 100 | * a kernel bounce buffer is used. |
97 | * | 101 | * |
98 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | 102 | * A matching blk_rq_unmap_user() must be issued at the end of I/O, while |
99 | * still in process context. | 103 | * still in process context. |
100 | * | 104 | * |
101 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | 105 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() |
@@ -105,16 +109,22 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, | |||
105 | * unmapping. | 109 | * unmapping. |
106 | */ | 110 | */ |
107 | int blk_rq_map_user(struct request_queue *q, struct request *rq, | 111 | int blk_rq_map_user(struct request_queue *q, struct request *rq, |
108 | void __user *ubuf, unsigned long len) | 112 | struct rq_map_data *map_data, void __user *ubuf, |
113 | unsigned long len, gfp_t gfp_mask) | ||
109 | { | 114 | { |
110 | unsigned long bytes_read = 0; | 115 | unsigned long bytes_read = 0; |
111 | struct bio *bio = NULL; | 116 | struct bio *bio = NULL; |
112 | int ret; | 117 | int ret, null_mapped = 0; |
113 | 118 | ||
114 | if (len > (q->max_hw_sectors << 9)) | 119 | if (len > (q->max_hw_sectors << 9)) |
115 | return -EINVAL; | 120 | return -EINVAL; |
116 | if (!len || !ubuf) | 121 | if (!len) |
117 | return -EINVAL; | 122 | return -EINVAL; |
123 | if (!ubuf) { | ||
124 | if (!map_data || rq_data_dir(rq) != READ) | ||
125 | return -EINVAL; | ||
126 | null_mapped = 1; | ||
127 | } | ||
118 | 128 | ||
119 | while (bytes_read != len) { | 129 | while (bytes_read != len) { |
120 | unsigned long map_len, end, start; | 130 | unsigned long map_len, end, start; |
@@ -132,7 +142,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, | |||
132 | if (end - start > BIO_MAX_PAGES) | 142 | if (end - start > BIO_MAX_PAGES) |
133 | map_len -= PAGE_SIZE; | 143 | map_len -= PAGE_SIZE; |
134 | 144 | ||
135 | ret = __blk_rq_map_user(q, rq, ubuf, map_len); | 145 | ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len, |
146 | null_mapped, gfp_mask); | ||
136 | if (ret < 0) | 147 | if (ret < 0) |
137 | goto unmap_rq; | 148 | goto unmap_rq; |
138 | if (!bio) | 149 | if (!bio) |
@@ -154,18 +165,20 @@ unmap_rq: | |||
154 | EXPORT_SYMBOL(blk_rq_map_user); | 165 | EXPORT_SYMBOL(blk_rq_map_user); |
155 | 166 | ||
156 | /** | 167 | /** |
157 | * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage | 168 | * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage |
158 | * @q: request queue where request should be inserted | 169 | * @q: request queue where request should be inserted |
159 | * @rq: request to map data to | 170 | * @rq: request to map data to |
171 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
160 | * @iov: pointer to the iovec | 172 | * @iov: pointer to the iovec |
161 | * @iov_count: number of elements in the iovec | 173 | * @iov_count: number of elements in the iovec |
162 | * @len: I/O byte count | 174 | * @len: I/O byte count |
175 | * @gfp_mask: memory allocation flags | ||
163 | * | 176 | * |
164 | * Description: | 177 | * Description: |
165 | * Data will be mapped directly for zero copy io, if possible. Otherwise | 178 | * Data will be mapped directly for zero copy I/O, if possible. Otherwise |
166 | * a kernel bounce buffer is used. | 179 | * a kernel bounce buffer is used. |
167 | * | 180 | * |
168 | * A matching blk_rq_unmap_user() must be issued at the end of io, while | 181 | * A matching blk_rq_unmap_user() must be issued at the end of I/O, while |
169 | * still in process context. | 182 | * still in process context. |
170 | * | 183 | * |
171 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() | 184 | * Note: The mapped bio may need to be bounced through blk_queue_bounce() |
@@ -175,7 +188,8 @@ EXPORT_SYMBOL(blk_rq_map_user); | |||
175 | * unmapping. | 188 | * unmapping. |
176 | */ | 189 | */ |
177 | int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | 190 | int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, |
178 | struct sg_iovec *iov, int iov_count, unsigned int len) | 191 | struct rq_map_data *map_data, struct sg_iovec *iov, |
192 | int iov_count, unsigned int len, gfp_t gfp_mask) | ||
179 | { | 193 | { |
180 | struct bio *bio; | 194 | struct bio *bio; |
181 | int i, read = rq_data_dir(rq) == READ; | 195 | int i, read = rq_data_dir(rq) == READ; |
@@ -193,10 +207,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | |||
193 | } | 207 | } |
194 | } | 208 | } |
195 | 209 | ||
196 | if (unaligned || (q->dma_pad_mask & len)) | 210 | if (unaligned || (q->dma_pad_mask & len) || map_data) |
197 | bio = bio_copy_user_iov(q, iov, iov_count, read); | 211 | bio = bio_copy_user_iov(q, map_data, iov, iov_count, read, |
212 | gfp_mask); | ||
198 | else | 213 | else |
199 | bio = bio_map_user_iov(q, NULL, iov, iov_count, read); | 214 | bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask); |
200 | 215 | ||
201 | if (IS_ERR(bio)) | 216 | if (IS_ERR(bio)) |
202 | return PTR_ERR(bio); | 217 | return PTR_ERR(bio); |
@@ -216,6 +231,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | |||
216 | rq->buffer = rq->data = NULL; | 231 | rq->buffer = rq->data = NULL; |
217 | return 0; | 232 | return 0; |
218 | } | 233 | } |
234 | EXPORT_SYMBOL(blk_rq_map_user_iov); | ||
219 | 235 | ||
220 | /** | 236 | /** |
221 | * blk_rq_unmap_user - unmap a request with user data | 237 | * blk_rq_unmap_user - unmap a request with user data |
@@ -224,7 +240,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | |||
224 | * Description: | 240 | * Description: |
225 | * Unmap a rq previously mapped by blk_rq_map_user(). The caller must | 241 | * Unmap a rq previously mapped by blk_rq_map_user(). The caller must |
226 | * supply the original rq->bio from the blk_rq_map_user() return, since | 242 | * supply the original rq->bio from the blk_rq_map_user() return, since |
227 | * the io completion may have changed rq->bio. | 243 | * the I/O completion may have changed rq->bio. |
228 | */ | 244 | */ |
229 | int blk_rq_unmap_user(struct bio *bio) | 245 | int blk_rq_unmap_user(struct bio *bio) |
230 | { | 246 | { |
@@ -250,7 +266,7 @@ int blk_rq_unmap_user(struct bio *bio) | |||
250 | EXPORT_SYMBOL(blk_rq_unmap_user); | 266 | EXPORT_SYMBOL(blk_rq_unmap_user); |
251 | 267 | ||
252 | /** | 268 | /** |
253 | * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage | 269 | * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage |
254 | * @q: request queue where request should be inserted | 270 | * @q: request queue where request should be inserted |
255 | * @rq: request to fill | 271 | * @rq: request to fill |
256 | * @kbuf: the kernel buffer | 272 | * @kbuf: the kernel buffer |
@@ -264,8 +280,6 @@ EXPORT_SYMBOL(blk_rq_unmap_user); | |||
264 | int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, | 280 | int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, |
265 | unsigned int len, gfp_t gfp_mask) | 281 | unsigned int len, gfp_t gfp_mask) |
266 | { | 282 | { |
267 | unsigned long kaddr; | ||
268 | unsigned int alignment; | ||
269 | int reading = rq_data_dir(rq) == READ; | 283 | int reading = rq_data_dir(rq) == READ; |
270 | int do_copy = 0; | 284 | int do_copy = 0; |
271 | struct bio *bio; | 285 | struct bio *bio; |
@@ -275,11 +289,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, | |||
275 | if (!len || !kbuf) | 289 | if (!len || !kbuf) |
276 | return -EINVAL; | 290 | return -EINVAL; |
277 | 291 | ||
278 | kaddr = (unsigned long)kbuf; | 292 | do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf); |
279 | alignment = queue_dma_alignment(q) | q->dma_pad_mask; | ||
280 | do_copy = ((kaddr & alignment) || (len & alignment) || | ||
281 | object_is_on_stack(kbuf)); | ||
282 | |||
283 | if (do_copy) | 293 | if (do_copy) |
284 | bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); | 294 | bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); |
285 | else | 295 | else |
diff --git a/block/blk-merge.c b/block/blk-merge.c index 5efc9e7a68b7..908d3e11ac52 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -11,7 +11,7 @@ | |||
11 | 11 | ||
12 | void blk_recalc_rq_sectors(struct request *rq, int nsect) | 12 | void blk_recalc_rq_sectors(struct request *rq, int nsect) |
13 | { | 13 | { |
14 | if (blk_fs_request(rq)) { | 14 | if (blk_fs_request(rq) || blk_discard_rq(rq)) { |
15 | rq->hard_sector += nsect; | 15 | rq->hard_sector += nsect; |
16 | rq->hard_nr_sectors -= nsect; | 16 | rq->hard_nr_sectors -= nsect; |
17 | 17 | ||
@@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) | |||
41 | void blk_recalc_rq_segments(struct request *rq) | 41 | void blk_recalc_rq_segments(struct request *rq) |
42 | { | 42 | { |
43 | int nr_phys_segs; | 43 | int nr_phys_segs; |
44 | int nr_hw_segs; | ||
45 | unsigned int phys_size; | 44 | unsigned int phys_size; |
46 | unsigned int hw_size; | ||
47 | struct bio_vec *bv, *bvprv = NULL; | 45 | struct bio_vec *bv, *bvprv = NULL; |
48 | int seg_size; | 46 | int seg_size; |
49 | int hw_seg_size; | ||
50 | int cluster; | 47 | int cluster; |
51 | struct req_iterator iter; | 48 | struct req_iterator iter; |
52 | int high, highprv = 1; | 49 | int high, highprv = 1; |
@@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct request *rq) | |||
56 | return; | 53 | return; |
57 | 54 | ||
58 | cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); | 55 | cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); |
59 | hw_seg_size = seg_size = 0; | 56 | seg_size = 0; |
60 | phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; | 57 | phys_size = nr_phys_segs = 0; |
61 | rq_for_each_segment(bv, rq, iter) { | 58 | rq_for_each_segment(bv, rq, iter) { |
62 | /* | 59 | /* |
63 | * the trick here is making sure that a high page is never | 60 | * the trick here is making sure that a high page is never |
@@ -66,7 +63,7 @@ void blk_recalc_rq_segments(struct request *rq) | |||
66 | */ | 63 | */ |
67 | high = page_to_pfn(bv->bv_page) > q->bounce_pfn; | 64 | high = page_to_pfn(bv->bv_page) > q->bounce_pfn; |
68 | if (high || highprv) | 65 | if (high || highprv) |
69 | goto new_hw_segment; | 66 | goto new_segment; |
70 | if (cluster) { | 67 | if (cluster) { |
71 | if (seg_size + bv->bv_len > q->max_segment_size) | 68 | if (seg_size + bv->bv_len > q->max_segment_size) |
72 | goto new_segment; | 69 | goto new_segment; |
@@ -74,40 +71,19 @@ void blk_recalc_rq_segments(struct request *rq) | |||
74 | goto new_segment; | 71 | goto new_segment; |
75 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) | 72 | if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) |
76 | goto new_segment; | 73 | goto new_segment; |
77 | if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | ||
78 | goto new_hw_segment; | ||
79 | 74 | ||
80 | seg_size += bv->bv_len; | 75 | seg_size += bv->bv_len; |
81 | hw_seg_size += bv->bv_len; | ||
82 | bvprv = bv; | 76 | bvprv = bv; |
83 | continue; | 77 | continue; |
84 | } | 78 | } |
85 | new_segment: | 79 | new_segment: |
86 | if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && | ||
87 | !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) | ||
88 | hw_seg_size += bv->bv_len; | ||
89 | else { | ||
90 | new_hw_segment: | ||
91 | if (nr_hw_segs == 1 && | ||
92 | hw_seg_size > rq->bio->bi_hw_front_size) | ||
93 | rq->bio->bi_hw_front_size = hw_seg_size; | ||
94 | hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; | ||
95 | nr_hw_segs++; | ||
96 | } | ||
97 | |||
98 | nr_phys_segs++; | 80 | nr_phys_segs++; |
99 | bvprv = bv; | 81 | bvprv = bv; |
100 | seg_size = bv->bv_len; | 82 | seg_size = bv->bv_len; |
101 | highprv = high; | 83 | highprv = high; |
102 | } | 84 | } |
103 | 85 | ||
104 | if (nr_hw_segs == 1 && | ||
105 | hw_seg_size > rq->bio->bi_hw_front_size) | ||
106 | rq->bio->bi_hw_front_size = hw_seg_size; | ||
107 | if (hw_seg_size > rq->biotail->bi_hw_back_size) | ||
108 | rq->biotail->bi_hw_back_size = hw_seg_size; | ||
109 | rq->nr_phys_segments = nr_phys_segs; | 86 | rq->nr_phys_segments = nr_phys_segs; |
110 | rq->nr_hw_segments = nr_hw_segs; | ||
111 | } | 87 | } |
112 | 88 | ||
113 | void blk_recount_segments(struct request_queue *q, struct bio *bio) | 89 | void blk_recount_segments(struct request_queue *q, struct bio *bio) |
@@ -120,7 +96,6 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio) | |||
120 | blk_recalc_rq_segments(&rq); | 96 | blk_recalc_rq_segments(&rq); |
121 | bio->bi_next = nxt; | 97 | bio->bi_next = nxt; |
122 | bio->bi_phys_segments = rq.nr_phys_segments; | 98 | bio->bi_phys_segments = rq.nr_phys_segments; |
123 | bio->bi_hw_segments = rq.nr_hw_segments; | ||
124 | bio->bi_flags |= (1 << BIO_SEG_VALID); | 99 | bio->bi_flags |= (1 << BIO_SEG_VALID); |
125 | } | 100 | } |
126 | EXPORT_SYMBOL(blk_recount_segments); | 101 | EXPORT_SYMBOL(blk_recount_segments); |
@@ -131,13 +106,17 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, | |||
131 | if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) | 106 | if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) |
132 | return 0; | 107 | return 0; |
133 | 108 | ||
134 | if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) | ||
135 | return 0; | ||
136 | if (bio->bi_size + nxt->bi_size > q->max_segment_size) | 109 | if (bio->bi_size + nxt->bi_size > q->max_segment_size) |
137 | return 0; | 110 | return 0; |
138 | 111 | ||
112 | if (!bio_has_data(bio)) | ||
113 | return 1; | ||
114 | |||
115 | if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) | ||
116 | return 0; | ||
117 | |||
139 | /* | 118 | /* |
140 | * bio and nxt are contigous in memory, check if the queue allows | 119 | * bio and nxt are contiguous in memory; check if the queue allows |
141 | * these two to be merged into one | 120 | * these two to be merged into one |
142 | */ | 121 | */ |
143 | if (BIO_SEG_BOUNDARY(q, bio, nxt)) | 122 | if (BIO_SEG_BOUNDARY(q, bio, nxt)) |
@@ -146,22 +125,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, | |||
146 | return 0; | 125 | return 0; |
147 | } | 126 | } |
148 | 127 | ||
149 | static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, | ||
150 | struct bio *nxt) | ||
151 | { | ||
152 | if (!bio_flagged(bio, BIO_SEG_VALID)) | ||
153 | blk_recount_segments(q, bio); | ||
154 | if (!bio_flagged(nxt, BIO_SEG_VALID)) | ||
155 | blk_recount_segments(q, nxt); | ||
156 | if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || | ||
157 | BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) | ||
158 | return 0; | ||
159 | if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) | ||
160 | return 0; | ||
161 | |||
162 | return 1; | ||
163 | } | ||
164 | |||
165 | /* | 128 | /* |
166 | * map a request to scatterlist, return number of sg entries setup. Caller | 129 | * map a request to scatterlist, return number of sg entries setup. Caller |
167 | * must make sure sg can hold rq->nr_phys_segments entries | 130 | * must make sure sg can hold rq->nr_phys_segments entries |
@@ -275,10 +238,9 @@ static inline int ll_new_hw_segment(struct request_queue *q, | |||
275 | struct request *req, | 238 | struct request *req, |
276 | struct bio *bio) | 239 | struct bio *bio) |
277 | { | 240 | { |
278 | int nr_hw_segs = bio_hw_segments(q, bio); | ||
279 | int nr_phys_segs = bio_phys_segments(q, bio); | 241 | int nr_phys_segs = bio_phys_segments(q, bio); |
280 | 242 | ||
281 | if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments | 243 | if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments |
282 | || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { | 244 | || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { |
283 | req->cmd_flags |= REQ_NOMERGE; | 245 | req->cmd_flags |= REQ_NOMERGE; |
284 | if (req == q->last_merge) | 246 | if (req == q->last_merge) |
@@ -290,7 +252,6 @@ static inline int ll_new_hw_segment(struct request_queue *q, | |||
290 | * This will form the start of a new hw segment. Bump both | 252 | * This will form the start of a new hw segment. Bump both |
291 | * counters. | 253 | * counters. |
292 | */ | 254 | */ |
293 | req->nr_hw_segments += nr_hw_segs; | ||
294 | req->nr_phys_segments += nr_phys_segs; | 255 | req->nr_phys_segments += nr_phys_segs; |
295 | return 1; | 256 | return 1; |
296 | } | 257 | } |
@@ -299,7 +260,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, | |||
299 | struct bio *bio) | 260 | struct bio *bio) |
300 | { | 261 | { |
301 | unsigned short max_sectors; | 262 | unsigned short max_sectors; |
302 | int len; | ||
303 | 263 | ||
304 | if (unlikely(blk_pc_request(req))) | 264 | if (unlikely(blk_pc_request(req))) |
305 | max_sectors = q->max_hw_sectors; | 265 | max_sectors = q->max_hw_sectors; |
@@ -316,19 +276,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, | |||
316 | blk_recount_segments(q, req->biotail); | 276 | blk_recount_segments(q, req->biotail); |
317 | if (!bio_flagged(bio, BIO_SEG_VALID)) | 277 | if (!bio_flagged(bio, BIO_SEG_VALID)) |
318 | blk_recount_segments(q, bio); | 278 | blk_recount_segments(q, bio); |
319 | len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; | ||
320 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) | ||
321 | && !BIOVEC_VIRT_OVERSIZE(len)) { | ||
322 | int mergeable = ll_new_mergeable(q, req, bio); | ||
323 | |||
324 | if (mergeable) { | ||
325 | if (req->nr_hw_segments == 1) | ||
326 | req->bio->bi_hw_front_size = len; | ||
327 | if (bio->bi_hw_segments == 1) | ||
328 | bio->bi_hw_back_size = len; | ||
329 | } | ||
330 | return mergeable; | ||
331 | } | ||
332 | 279 | ||
333 | return ll_new_hw_segment(q, req, bio); | 280 | return ll_new_hw_segment(q, req, bio); |
334 | } | 281 | } |
@@ -337,7 +284,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, | |||
337 | struct bio *bio) | 284 | struct bio *bio) |
338 | { | 285 | { |
339 | unsigned short max_sectors; | 286 | unsigned short max_sectors; |
340 | int len; | ||
341 | 287 | ||
342 | if (unlikely(blk_pc_request(req))) | 288 | if (unlikely(blk_pc_request(req))) |
343 | max_sectors = q->max_hw_sectors; | 289 | max_sectors = q->max_hw_sectors; |
@@ -351,23 +297,10 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, | |||
351 | q->last_merge = NULL; | 297 | q->last_merge = NULL; |
352 | return 0; | 298 | return 0; |
353 | } | 299 | } |
354 | len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; | ||
355 | if (!bio_flagged(bio, BIO_SEG_VALID)) | 300 | if (!bio_flagged(bio, BIO_SEG_VALID)) |
356 | blk_recount_segments(q, bio); | 301 | blk_recount_segments(q, bio); |
357 | if (!bio_flagged(req->bio, BIO_SEG_VALID)) | 302 | if (!bio_flagged(req->bio, BIO_SEG_VALID)) |
358 | blk_recount_segments(q, req->bio); | 303 | blk_recount_segments(q, req->bio); |
359 | if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && | ||
360 | !BIOVEC_VIRT_OVERSIZE(len)) { | ||
361 | int mergeable = ll_new_mergeable(q, req, bio); | ||
362 | |||
363 | if (mergeable) { | ||
364 | if (bio->bi_hw_segments == 1) | ||
365 | bio->bi_hw_front_size = len; | ||
366 | if (req->nr_hw_segments == 1) | ||
367 | req->biotail->bi_hw_back_size = len; | ||
368 | } | ||
369 | return mergeable; | ||
370 | } | ||
371 | 304 | ||
372 | return ll_new_hw_segment(q, req, bio); | 305 | return ll_new_hw_segment(q, req, bio); |
373 | } | 306 | } |
@@ -376,7 +309,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | |||
376 | struct request *next) | 309 | struct request *next) |
377 | { | 310 | { |
378 | int total_phys_segments; | 311 | int total_phys_segments; |
379 | int total_hw_segments; | ||
380 | 312 | ||
381 | /* | 313 | /* |
382 | * First check if the either of the requests are re-queued | 314 | * First check if the either of the requests are re-queued |
@@ -398,26 +330,11 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | |||
398 | if (total_phys_segments > q->max_phys_segments) | 330 | if (total_phys_segments > q->max_phys_segments) |
399 | return 0; | 331 | return 0; |
400 | 332 | ||
401 | total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; | 333 | if (total_phys_segments > q->max_hw_segments) |
402 | if (blk_hw_contig_segment(q, req->biotail, next->bio)) { | ||
403 | int len = req->biotail->bi_hw_back_size + | ||
404 | next->bio->bi_hw_front_size; | ||
405 | /* | ||
406 | * propagate the combined length to the end of the requests | ||
407 | */ | ||
408 | if (req->nr_hw_segments == 1) | ||
409 | req->bio->bi_hw_front_size = len; | ||
410 | if (next->nr_hw_segments == 1) | ||
411 | next->biotail->bi_hw_back_size = len; | ||
412 | total_hw_segments--; | ||
413 | } | ||
414 | |||
415 | if (total_hw_segments > q->max_hw_segments) | ||
416 | return 0; | 334 | return 0; |
417 | 335 | ||
418 | /* Merge is OK... */ | 336 | /* Merge is OK... */ |
419 | req->nr_phys_segments = total_phys_segments; | 337 | req->nr_phys_segments = total_phys_segments; |
420 | req->nr_hw_segments = total_hw_segments; | ||
421 | return 1; | 338 | return 1; |
422 | } | 339 | } |
423 | 340 | ||
@@ -470,17 +387,21 @@ static int attempt_merge(struct request_queue *q, struct request *req, | |||
470 | elv_merge_requests(q, req, next); | 387 | elv_merge_requests(q, req, next); |
471 | 388 | ||
472 | if (req->rq_disk) { | 389 | if (req->rq_disk) { |
473 | struct hd_struct *part | 390 | struct hd_struct *part; |
474 | = get_part(req->rq_disk, req->sector); | 391 | int cpu; |
475 | disk_round_stats(req->rq_disk); | 392 | |
476 | req->rq_disk->in_flight--; | 393 | cpu = part_stat_lock(); |
477 | if (part) { | 394 | part = disk_map_sector_rcu(req->rq_disk, req->sector); |
478 | part_round_stats(part); | 395 | |
479 | part->in_flight--; | 396 | part_round_stats(cpu, part); |
480 | } | 397 | part_dec_in_flight(part); |
398 | |||
399 | part_stat_unlock(); | ||
481 | } | 400 | } |
482 | 401 | ||
483 | req->ioprio = ioprio_best(req->ioprio, next->ioprio); | 402 | req->ioprio = ioprio_best(req->ioprio, next->ioprio); |
403 | if (blk_rq_cpu_valid(next)) | ||
404 | req->cpu = next->cpu; | ||
484 | 405 | ||
485 | __blk_put_request(q, next); | 406 | __blk_put_request(q, next); |
486 | return 1; | 407 | return 1; |
diff --git a/block/blk-settings.c b/block/blk-settings.c index dfc77012843f..b21dcdb64151 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -33,6 +33,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) | |||
33 | EXPORT_SYMBOL(blk_queue_prep_rq); | 33 | EXPORT_SYMBOL(blk_queue_prep_rq); |
34 | 34 | ||
35 | /** | 35 | /** |
36 | * blk_queue_set_discard - set a discard_sectors function for queue | ||
37 | * @q: queue | ||
38 | * @dfn: prepare_discard function | ||
39 | * | ||
40 | * It's possible for a queue to register a discard callback which is used | ||
41 | * to transform a discard request into the appropriate type for the | ||
42 | * hardware. If none is registered, then discard requests are failed | ||
43 | * with %EOPNOTSUPP. | ||
44 | * | ||
45 | */ | ||
46 | void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) | ||
47 | { | ||
48 | q->prepare_discard_fn = dfn; | ||
49 | } | ||
50 | EXPORT_SYMBOL(blk_queue_set_discard); | ||
51 | |||
52 | /** | ||
36 | * blk_queue_merge_bvec - set a merge_bvec function for queue | 53 | * blk_queue_merge_bvec - set a merge_bvec function for queue |
37 | * @q: queue | 54 | * @q: queue |
38 | * @mbfn: merge_bvec_fn | 55 | * @mbfn: merge_bvec_fn |
@@ -60,6 +77,24 @@ void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) | |||
60 | } | 77 | } |
61 | EXPORT_SYMBOL(blk_queue_softirq_done); | 78 | EXPORT_SYMBOL(blk_queue_softirq_done); |
62 | 79 | ||
80 | void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) | ||
81 | { | ||
82 | q->rq_timeout = timeout; | ||
83 | } | ||
84 | EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); | ||
85 | |||
86 | void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) | ||
87 | { | ||
88 | q->rq_timed_out_fn = fn; | ||
89 | } | ||
90 | EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); | ||
91 | |||
92 | void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) | ||
93 | { | ||
94 | q->lld_busy_fn = fn; | ||
95 | } | ||
96 | EXPORT_SYMBOL_GPL(blk_queue_lld_busy); | ||
97 | |||
63 | /** | 98 | /** |
64 | * blk_queue_make_request - define an alternate make_request function for a device | 99 | * blk_queue_make_request - define an alternate make_request function for a device |
65 | * @q: the request queue for the device to be affected | 100 | * @q: the request queue for the device to be affected |
@@ -127,7 +162,7 @@ EXPORT_SYMBOL(blk_queue_make_request); | |||
127 | * Different hardware can have different requirements as to what pages | 162 | * Different hardware can have different requirements as to what pages |
128 | * it can do I/O directly to. A low level driver can call | 163 | * it can do I/O directly to. A low level driver can call |
129 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce | 164 | * blk_queue_bounce_limit to have lower memory pages allocated as bounce |
130 | * buffers for doing I/O to pages residing above @page. | 165 | * buffers for doing I/O to pages residing above @dma_addr. |
131 | **/ | 166 | **/ |
132 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) | 167 | void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) |
133 | { | 168 | { |
@@ -212,7 +247,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segments); | |||
212 | * Description: | 247 | * Description: |
213 | * Enables a low level driver to set an upper limit on the number of | 248 | * Enables a low level driver to set an upper limit on the number of |
214 | * hw data segments in a request. This would be the largest number of | 249 | * hw data segments in a request. This would be the largest number of |
215 | * address/length pairs the host adapter can actually give as once | 250 | * address/length pairs the host adapter can actually give at once |
216 | * to the device. | 251 | * to the device. |
217 | **/ | 252 | **/ |
218 | void blk_queue_max_hw_segments(struct request_queue *q, | 253 | void blk_queue_max_hw_segments(struct request_queue *q, |
@@ -393,7 +428,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary); | |||
393 | * @mask: alignment mask | 428 | * @mask: alignment mask |
394 | * | 429 | * |
395 | * description: | 430 | * description: |
396 | * set required memory and length aligment for direct dma transactions. | 431 | * set required memory and length alignment for direct dma transactions. |
397 | * this is used when buiding direct io requests for the queue. | 432 | * this is used when buiding direct io requests for the queue. |
398 | * | 433 | * |
399 | **/ | 434 | **/ |
@@ -409,7 +444,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment); | |||
409 | * @mask: alignment mask | 444 | * @mask: alignment mask |
410 | * | 445 | * |
411 | * description: | 446 | * description: |
412 | * update required memory and length aligment for direct dma transactions. | 447 | * update required memory and length alignment for direct dma transactions. |
413 | * If the requested alignment is larger than the current alignment, then | 448 | * If the requested alignment is larger than the current alignment, then |
414 | * the current queue alignment is updated to the new value, otherwise it | 449 | * the current queue alignment is updated to the new value, otherwise it |
415 | * is left alone. The design of this is to allow multiple objects | 450 | * is left alone. The design of this is to allow multiple objects |
diff --git a/block/blk-softirq.c b/block/blk-softirq.c new file mode 100644 index 000000000000..e660d26ca656 --- /dev/null +++ b/block/blk-softirq.c | |||
@@ -0,0 +1,175 @@ | |||
1 | /* | ||
2 | * Functions related to softirq rq completions | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/init.h> | ||
7 | #include <linux/bio.h> | ||
8 | #include <linux/blkdev.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/cpu.h> | ||
11 | |||
12 | #include "blk.h" | ||
13 | |||
14 | static DEFINE_PER_CPU(struct list_head, blk_cpu_done); | ||
15 | |||
16 | /* | ||
17 | * Softirq action handler - move entries to local list and loop over them | ||
18 | * while passing them to the queue registered handler. | ||
19 | */ | ||
20 | static void blk_done_softirq(struct softirq_action *h) | ||
21 | { | ||
22 | struct list_head *cpu_list, local_list; | ||
23 | |||
24 | local_irq_disable(); | ||
25 | cpu_list = &__get_cpu_var(blk_cpu_done); | ||
26 | list_replace_init(cpu_list, &local_list); | ||
27 | local_irq_enable(); | ||
28 | |||
29 | while (!list_empty(&local_list)) { | ||
30 | struct request *rq; | ||
31 | |||
32 | rq = list_entry(local_list.next, struct request, csd.list); | ||
33 | list_del_init(&rq->csd.list); | ||
34 | rq->q->softirq_done_fn(rq); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | #if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) | ||
39 | static void trigger_softirq(void *data) | ||
40 | { | ||
41 | struct request *rq = data; | ||
42 | unsigned long flags; | ||
43 | struct list_head *list; | ||
44 | |||
45 | local_irq_save(flags); | ||
46 | list = &__get_cpu_var(blk_cpu_done); | ||
47 | list_add_tail(&rq->csd.list, list); | ||
48 | |||
49 | if (list->next == &rq->csd.list) | ||
50 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | ||
51 | |||
52 | local_irq_restore(flags); | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Setup and invoke a run of 'trigger_softirq' on the given cpu. | ||
57 | */ | ||
58 | static int raise_blk_irq(int cpu, struct request *rq) | ||
59 | { | ||
60 | if (cpu_online(cpu)) { | ||
61 | struct call_single_data *data = &rq->csd; | ||
62 | |||
63 | data->func = trigger_softirq; | ||
64 | data->info = rq; | ||
65 | data->flags = 0; | ||
66 | |||
67 | __smp_call_function_single(cpu, data); | ||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | return 1; | ||
72 | } | ||
73 | #else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ | ||
74 | static int raise_blk_irq(int cpu, struct request *rq) | ||
75 | { | ||
76 | return 1; | ||
77 | } | ||
78 | #endif | ||
79 | |||
80 | static int __cpuinit blk_cpu_notify(struct notifier_block *self, | ||
81 | unsigned long action, void *hcpu) | ||
82 | { | ||
83 | /* | ||
84 | * If a CPU goes away, splice its entries to the current CPU | ||
85 | * and trigger a run of the softirq | ||
86 | */ | ||
87 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | ||
88 | int cpu = (unsigned long) hcpu; | ||
89 | |||
90 | local_irq_disable(); | ||
91 | list_splice_init(&per_cpu(blk_cpu_done, cpu), | ||
92 | &__get_cpu_var(blk_cpu_done)); | ||
93 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | ||
94 | local_irq_enable(); | ||
95 | } | ||
96 | |||
97 | return NOTIFY_OK; | ||
98 | } | ||
99 | |||
100 | static struct notifier_block __cpuinitdata blk_cpu_notifier = { | ||
101 | .notifier_call = blk_cpu_notify, | ||
102 | }; | ||
103 | |||
104 | void __blk_complete_request(struct request *req) | ||
105 | { | ||
106 | struct request_queue *q = req->q; | ||
107 | unsigned long flags; | ||
108 | int ccpu, cpu, group_cpu; | ||
109 | |||
110 | BUG_ON(!q->softirq_done_fn); | ||
111 | |||
112 | local_irq_save(flags); | ||
113 | cpu = smp_processor_id(); | ||
114 | group_cpu = blk_cpu_to_group(cpu); | ||
115 | |||
116 | /* | ||
117 | * Select completion CPU | ||
118 | */ | ||
119 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) | ||
120 | ccpu = req->cpu; | ||
121 | else | ||
122 | ccpu = cpu; | ||
123 | |||
124 | if (ccpu == cpu || ccpu == group_cpu) { | ||
125 | struct list_head *list; | ||
126 | do_local: | ||
127 | list = &__get_cpu_var(blk_cpu_done); | ||
128 | list_add_tail(&req->csd.list, list); | ||
129 | |||
130 | /* | ||
131 | * if the list only contains our just added request, | ||
132 | * signal a raise of the softirq. If there are already | ||
133 | * entries there, someone already raised the irq but it | ||
134 | * hasn't run yet. | ||
135 | */ | ||
136 | if (list->next == &req->csd.list) | ||
137 | raise_softirq_irqoff(BLOCK_SOFTIRQ); | ||
138 | } else if (raise_blk_irq(ccpu, req)) | ||
139 | goto do_local; | ||
140 | |||
141 | local_irq_restore(flags); | ||
142 | } | ||
143 | |||
144 | /** | ||
145 | * blk_complete_request - end I/O on a request | ||
146 | * @req: the request being processed | ||
147 | * | ||
148 | * Description: | ||
149 | * Ends all I/O on a request. It does not handle partial completions, | ||
150 | * unless the driver actually implements this in its completion callback | ||
151 | * through requeueing. The actual completion happens out-of-order, | ||
152 | * through a softirq handler. The user must have registered a completion | ||
153 | * callback through blk_queue_softirq_done(). | ||
154 | **/ | ||
155 | void blk_complete_request(struct request *req) | ||
156 | { | ||
157 | if (unlikely(blk_should_fake_timeout(req->q))) | ||
158 | return; | ||
159 | if (!blk_mark_rq_complete(req)) | ||
160 | __blk_complete_request(req); | ||
161 | } | ||
162 | EXPORT_SYMBOL(blk_complete_request); | ||
163 | |||
164 | __init int blk_softirq_init(void) | ||
165 | { | ||
166 | int i; | ||
167 | |||
168 | for_each_possible_cpu(i) | ||
169 | INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); | ||
170 | |||
171 | open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); | ||
172 | register_hotcpu_notifier(&blk_cpu_notifier); | ||
173 | return 0; | ||
174 | } | ||
175 | subsys_initcall(blk_softirq_init); | ||
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 304ec73ab821..21e275d7eed9 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, | |||
156 | return ret; | 156 | return ret; |
157 | } | 157 | } |
158 | 158 | ||
159 | static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) | ||
160 | { | ||
161 | unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); | ||
162 | |||
163 | return queue_var_show(set != 0, page); | ||
164 | } | ||
165 | |||
166 | static ssize_t | ||
167 | queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) | ||
168 | { | ||
169 | ssize_t ret = -EINVAL; | ||
170 | #if defined(CONFIG_USE_GENERIC_SMP_HELPERS) | ||
171 | unsigned long val; | ||
172 | |||
173 | ret = queue_var_store(&val, page, count); | ||
174 | spin_lock_irq(q->queue_lock); | ||
175 | if (val) | ||
176 | queue_flag_set(QUEUE_FLAG_SAME_COMP, q); | ||
177 | else | ||
178 | queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); | ||
179 | spin_unlock_irq(q->queue_lock); | ||
180 | #endif | ||
181 | return ret; | ||
182 | } | ||
159 | 183 | ||
160 | static struct queue_sysfs_entry queue_requests_entry = { | 184 | static struct queue_sysfs_entry queue_requests_entry = { |
161 | .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, | 185 | .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, |
@@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_nomerges_entry = { | |||
197 | .store = queue_nomerges_store, | 221 | .store = queue_nomerges_store, |
198 | }; | 222 | }; |
199 | 223 | ||
224 | static struct queue_sysfs_entry queue_rq_affinity_entry = { | ||
225 | .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR }, | ||
226 | .show = queue_rq_affinity_show, | ||
227 | .store = queue_rq_affinity_store, | ||
228 | }; | ||
229 | |||
200 | static struct attribute *default_attrs[] = { | 230 | static struct attribute *default_attrs[] = { |
201 | &queue_requests_entry.attr, | 231 | &queue_requests_entry.attr, |
202 | &queue_ra_entry.attr, | 232 | &queue_ra_entry.attr, |
@@ -205,6 +235,7 @@ static struct attribute *default_attrs[] = { | |||
205 | &queue_iosched_entry.attr, | 235 | &queue_iosched_entry.attr, |
206 | &queue_hw_sector_size_entry.attr, | 236 | &queue_hw_sector_size_entry.attr, |
207 | &queue_nomerges_entry.attr, | 237 | &queue_nomerges_entry.attr, |
238 | &queue_rq_affinity_entry.attr, | ||
208 | NULL, | 239 | NULL, |
209 | }; | 240 | }; |
210 | 241 | ||
@@ -310,7 +341,7 @@ int blk_register_queue(struct gendisk *disk) | |||
310 | if (!q->request_fn) | 341 | if (!q->request_fn) |
311 | return 0; | 342 | return 0; |
312 | 343 | ||
313 | ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj), | 344 | ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), |
314 | "%s", "queue"); | 345 | "%s", "queue"); |
315 | if (ret < 0) | 346 | if (ret < 0) |
316 | return ret; | 347 | return ret; |
@@ -339,6 +370,6 @@ void blk_unregister_queue(struct gendisk *disk) | |||
339 | 370 | ||
340 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | 371 | kobject_uevent(&q->kobj, KOBJ_REMOVE); |
341 | kobject_del(&q->kobj); | 372 | kobject_del(&q->kobj); |
342 | kobject_put(&disk->dev.kobj); | 373 | kobject_put(&disk_to_dev(disk)->kobj); |
343 | } | 374 | } |
344 | } | 375 | } |
diff --git a/block/blk-tag.c b/block/blk-tag.c index ed5166fbc599..c0d419e84ce7 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c | |||
@@ -29,7 +29,7 @@ EXPORT_SYMBOL(blk_queue_find_tag); | |||
29 | * __blk_free_tags - release a given set of tag maintenance info | 29 | * __blk_free_tags - release a given set of tag maintenance info |
30 | * @bqt: the tag map to free | 30 | * @bqt: the tag map to free |
31 | * | 31 | * |
32 | * Tries to free the specified @bqt@. Returns true if it was | 32 | * Tries to free the specified @bqt. Returns true if it was |
33 | * actually freed and false if there are still references using it | 33 | * actually freed and false if there are still references using it |
34 | */ | 34 | */ |
35 | static int __blk_free_tags(struct blk_queue_tag *bqt) | 35 | static int __blk_free_tags(struct blk_queue_tag *bqt) |
@@ -78,7 +78,7 @@ void __blk_queue_free_tags(struct request_queue *q) | |||
78 | * blk_free_tags - release a given set of tag maintenance info | 78 | * blk_free_tags - release a given set of tag maintenance info |
79 | * @bqt: the tag map to free | 79 | * @bqt: the tag map to free |
80 | * | 80 | * |
81 | * For externally managed @bqt@ frees the map. Callers of this | 81 | * For externally managed @bqt frees the map. Callers of this |
82 | * function must guarantee to have released all the queues that | 82 | * function must guarantee to have released all the queues that |
83 | * might have been using this tag map. | 83 | * might have been using this tag map. |
84 | */ | 84 | */ |
@@ -94,7 +94,7 @@ EXPORT_SYMBOL(blk_free_tags); | |||
94 | * @q: the request queue for the device | 94 | * @q: the request queue for the device |
95 | * | 95 | * |
96 | * Notes: | 96 | * Notes: |
97 | * This is used to disabled tagged queuing to a device, yet leave | 97 | * This is used to disable tagged queuing to a device, yet leave |
98 | * queue in function. | 98 | * queue in function. |
99 | **/ | 99 | **/ |
100 | void blk_queue_free_tags(struct request_queue *q) | 100 | void blk_queue_free_tags(struct request_queue *q) |
@@ -271,7 +271,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags); | |||
271 | * @rq: the request that has completed | 271 | * @rq: the request that has completed |
272 | * | 272 | * |
273 | * Description: | 273 | * Description: |
274 | * Typically called when end_that_request_first() returns 0, meaning | 274 | * Typically called when end_that_request_first() returns %0, meaning |
275 | * all transfers have been done for a request. It's important to call | 275 | * all transfers have been done for a request. It's important to call |
276 | * this function before end_that_request_last(), as that will put the | 276 | * this function before end_that_request_last(), as that will put the |
277 | * request back on the free list thus corrupting the internal tag list. | 277 | * request back on the free list thus corrupting the internal tag list. |
@@ -337,6 +337,7 @@ EXPORT_SYMBOL(blk_queue_end_tag); | |||
337 | int blk_queue_start_tag(struct request_queue *q, struct request *rq) | 337 | int blk_queue_start_tag(struct request_queue *q, struct request *rq) |
338 | { | 338 | { |
339 | struct blk_queue_tag *bqt = q->queue_tags; | 339 | struct blk_queue_tag *bqt = q->queue_tags; |
340 | unsigned max_depth, offset; | ||
340 | int tag; | 341 | int tag; |
341 | 342 | ||
342 | if (unlikely((rq->cmd_flags & REQ_QUEUED))) { | 343 | if (unlikely((rq->cmd_flags & REQ_QUEUED))) { |
@@ -350,10 +351,19 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) | |||
350 | /* | 351 | /* |
351 | * Protect against shared tag maps, as we may not have exclusive | 352 | * Protect against shared tag maps, as we may not have exclusive |
352 | * access to the tag map. | 353 | * access to the tag map. |
354 | * | ||
355 | * We reserve a few tags just for sync IO, since we don't want | ||
356 | * to starve sync IO on behalf of flooding async IO. | ||
353 | */ | 357 | */ |
358 | max_depth = bqt->max_depth; | ||
359 | if (rq_is_sync(rq)) | ||
360 | offset = 0; | ||
361 | else | ||
362 | offset = max_depth >> 2; | ||
363 | |||
354 | do { | 364 | do { |
355 | tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); | 365 | tag = find_next_zero_bit(bqt->tag_map, max_depth, offset); |
356 | if (tag >= bqt->max_depth) | 366 | if (tag >= max_depth) |
357 | return 1; | 367 | return 1; |
358 | 368 | ||
359 | } while (test_and_set_bit_lock(tag, bqt->tag_map)); | 369 | } while (test_and_set_bit_lock(tag, bqt->tag_map)); |
diff --git a/block/blk-timeout.c b/block/blk-timeout.c new file mode 100644 index 000000000000..972a63f848fb --- /dev/null +++ b/block/blk-timeout.c | |||
@@ -0,0 +1,238 @@ | |||
1 | /* | ||
2 | * Functions related to generic timeout handling of requests. | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/blkdev.h> | ||
7 | #include <linux/fault-inject.h> | ||
8 | |||
9 | #include "blk.h" | ||
10 | |||
11 | #ifdef CONFIG_FAIL_IO_TIMEOUT | ||
12 | |||
13 | static DECLARE_FAULT_ATTR(fail_io_timeout); | ||
14 | |||
15 | static int __init setup_fail_io_timeout(char *str) | ||
16 | { | ||
17 | return setup_fault_attr(&fail_io_timeout, str); | ||
18 | } | ||
19 | __setup("fail_io_timeout=", setup_fail_io_timeout); | ||
20 | |||
21 | int blk_should_fake_timeout(struct request_queue *q) | ||
22 | { | ||
23 | if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags)) | ||
24 | return 0; | ||
25 | |||
26 | return should_fail(&fail_io_timeout, 1); | ||
27 | } | ||
28 | |||
29 | static int __init fail_io_timeout_debugfs(void) | ||
30 | { | ||
31 | return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout"); | ||
32 | } | ||
33 | |||
34 | late_initcall(fail_io_timeout_debugfs); | ||
35 | |||
36 | ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr, | ||
37 | char *buf) | ||
38 | { | ||
39 | struct gendisk *disk = dev_to_disk(dev); | ||
40 | int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags); | ||
41 | |||
42 | return sprintf(buf, "%d\n", set != 0); | ||
43 | } | ||
44 | |||
45 | ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr, | ||
46 | const char *buf, size_t count) | ||
47 | { | ||
48 | struct gendisk *disk = dev_to_disk(dev); | ||
49 | int val; | ||
50 | |||
51 | if (count) { | ||
52 | struct request_queue *q = disk->queue; | ||
53 | char *p = (char *) buf; | ||
54 | |||
55 | val = simple_strtoul(p, &p, 10); | ||
56 | spin_lock_irq(q->queue_lock); | ||
57 | if (val) | ||
58 | queue_flag_set(QUEUE_FLAG_FAIL_IO, q); | ||
59 | else | ||
60 | queue_flag_clear(QUEUE_FLAG_FAIL_IO, q); | ||
61 | spin_unlock_irq(q->queue_lock); | ||
62 | } | ||
63 | |||
64 | return count; | ||
65 | } | ||
66 | |||
67 | #endif /* CONFIG_FAIL_IO_TIMEOUT */ | ||
68 | |||
69 | /* | ||
70 | * blk_delete_timer - Delete/cancel timer for a given function. | ||
71 | * @req: request that we are canceling timer for | ||
72 | * | ||
73 | */ | ||
74 | void blk_delete_timer(struct request *req) | ||
75 | { | ||
76 | struct request_queue *q = req->q; | ||
77 | |||
78 | /* | ||
79 | * Nothing to detach | ||
80 | */ | ||
81 | if (!q->rq_timed_out_fn || !req->deadline) | ||
82 | return; | ||
83 | |||
84 | list_del_init(&req->timeout_list); | ||
85 | |||
86 | if (list_empty(&q->timeout_list)) | ||
87 | del_timer(&q->timeout); | ||
88 | } | ||
89 | |||
90 | static void blk_rq_timed_out(struct request *req) | ||
91 | { | ||
92 | struct request_queue *q = req->q; | ||
93 | enum blk_eh_timer_return ret; | ||
94 | |||
95 | ret = q->rq_timed_out_fn(req); | ||
96 | switch (ret) { | ||
97 | case BLK_EH_HANDLED: | ||
98 | __blk_complete_request(req); | ||
99 | break; | ||
100 | case BLK_EH_RESET_TIMER: | ||
101 | blk_clear_rq_complete(req); | ||
102 | blk_add_timer(req); | ||
103 | break; | ||
104 | case BLK_EH_NOT_HANDLED: | ||
105 | /* | ||
106 | * LLD handles this for now but in the future | ||
107 | * we can send a request msg to abort the command | ||
108 | * and we can move more of the generic scsi eh code to | ||
109 | * the blk layer. | ||
110 | */ | ||
111 | break; | ||
112 | default: | ||
113 | printk(KERN_ERR "block: bad eh return: %d\n", ret); | ||
114 | break; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | void blk_rq_timed_out_timer(unsigned long data) | ||
119 | { | ||
120 | struct request_queue *q = (struct request_queue *) data; | ||
121 | unsigned long flags, uninitialized_var(next), next_set = 0; | ||
122 | struct request *rq, *tmp; | ||
123 | |||
124 | spin_lock_irqsave(q->queue_lock, flags); | ||
125 | |||
126 | list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) { | ||
127 | if (time_after_eq(jiffies, rq->deadline)) { | ||
128 | list_del_init(&rq->timeout_list); | ||
129 | |||
130 | /* | ||
131 | * Check if we raced with end io completion | ||
132 | */ | ||
133 | if (blk_mark_rq_complete(rq)) | ||
134 | continue; | ||
135 | blk_rq_timed_out(rq); | ||
136 | } | ||
137 | if (!next_set) { | ||
138 | next = rq->deadline; | ||
139 | next_set = 1; | ||
140 | } else if (time_after(next, rq->deadline)) | ||
141 | next = rq->deadline; | ||
142 | } | ||
143 | |||
144 | if (next_set && !list_empty(&q->timeout_list)) | ||
145 | mod_timer(&q->timeout, round_jiffies(next)); | ||
146 | |||
147 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
148 | } | ||
149 | |||
150 | /** | ||
151 | * blk_abort_request -- Request request recovery for the specified command | ||
152 | * @req: pointer to the request of interest | ||
153 | * | ||
154 | * This function requests that the block layer start recovery for the | ||
155 | * request by deleting the timer and calling the q's timeout function. | ||
156 | * LLDDs who implement their own error recovery MAY ignore the timeout | ||
157 | * event if they generated blk_abort_req. Must hold queue lock. | ||
158 | */ | ||
159 | void blk_abort_request(struct request *req) | ||
160 | { | ||
161 | if (blk_mark_rq_complete(req)) | ||
162 | return; | ||
163 | blk_delete_timer(req); | ||
164 | blk_rq_timed_out(req); | ||
165 | } | ||
166 | EXPORT_SYMBOL_GPL(blk_abort_request); | ||
167 | |||
168 | /** | ||
169 | * blk_add_timer - Start timeout timer for a single request | ||
170 | * @req: request that is about to start running. | ||
171 | * | ||
172 | * Notes: | ||
173 | * Each request has its own timer, and as it is added to the queue, we | ||
174 | * set up the timer. When the request completes, we cancel the timer. | ||
175 | */ | ||
176 | void blk_add_timer(struct request *req) | ||
177 | { | ||
178 | struct request_queue *q = req->q; | ||
179 | unsigned long expiry; | ||
180 | |||
181 | if (!q->rq_timed_out_fn) | ||
182 | return; | ||
183 | |||
184 | BUG_ON(!list_empty(&req->timeout_list)); | ||
185 | BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); | ||
186 | |||
187 | if (req->timeout) | ||
188 | req->deadline = jiffies + req->timeout; | ||
189 | else { | ||
190 | req->deadline = jiffies + q->rq_timeout; | ||
191 | /* | ||
192 | * Some LLDs, like scsi, peek at the timeout to prevent | ||
193 | * a command from being retried forever. | ||
194 | */ | ||
195 | req->timeout = q->rq_timeout; | ||
196 | } | ||
197 | list_add_tail(&req->timeout_list, &q->timeout_list); | ||
198 | |||
199 | /* | ||
200 | * If the timer isn't already pending or this timeout is earlier | ||
201 | * than an existing one, modify the timer. Round to next nearest | ||
202 | * second. | ||
203 | */ | ||
204 | expiry = round_jiffies(req->deadline); | ||
205 | |||
206 | /* | ||
207 | * We use ->deadline == 0 to detect whether a timer was added or | ||
208 | * not, so just increase to next jiffy for that specific case | ||
209 | */ | ||
210 | if (unlikely(!req->deadline)) | ||
211 | req->deadline = 1; | ||
212 | |||
213 | if (!timer_pending(&q->timeout) || | ||
214 | time_before(expiry, q->timeout.expires)) | ||
215 | mod_timer(&q->timeout, expiry); | ||
216 | } | ||
217 | |||
218 | /** | ||
219 | * blk_abort_queue -- Abort all request on given queue | ||
220 | * @queue: pointer to queue | ||
221 | * | ||
222 | */ | ||
223 | void blk_abort_queue(struct request_queue *q) | ||
224 | { | ||
225 | unsigned long flags; | ||
226 | struct request *rq, *tmp; | ||
227 | |||
228 | spin_lock_irqsave(q->queue_lock, flags); | ||
229 | |||
230 | elv_abort_queue(q); | ||
231 | |||
232 | list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) | ||
233 | blk_abort_request(rq); | ||
234 | |||
235 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
236 | |||
237 | } | ||
238 | EXPORT_SYMBOL_GPL(blk_abort_queue); | ||
diff --git a/block/blk.h b/block/blk.h index c79f30e1df52..e5c579769963 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -17,6 +17,42 @@ void __blk_queue_free_tags(struct request_queue *q); | |||
17 | 17 | ||
18 | void blk_unplug_work(struct work_struct *work); | 18 | void blk_unplug_work(struct work_struct *work); |
19 | void blk_unplug_timeout(unsigned long data); | 19 | void blk_unplug_timeout(unsigned long data); |
20 | void blk_rq_timed_out_timer(unsigned long data); | ||
21 | void blk_delete_timer(struct request *); | ||
22 | void blk_add_timer(struct request *); | ||
23 | |||
24 | /* | ||
25 | * Internal atomic flags for request handling | ||
26 | */ | ||
27 | enum rq_atomic_flags { | ||
28 | REQ_ATOM_COMPLETE = 0, | ||
29 | }; | ||
30 | |||
31 | /* | ||
32 | * EH timer and IO completion will both attempt to 'grab' the request, make | ||
33 | * sure that only one of them suceeds | ||
34 | */ | ||
35 | static inline int blk_mark_rq_complete(struct request *rq) | ||
36 | { | ||
37 | return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); | ||
38 | } | ||
39 | |||
40 | static inline void blk_clear_rq_complete(struct request *rq) | ||
41 | { | ||
42 | clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); | ||
43 | } | ||
44 | |||
45 | #ifdef CONFIG_FAIL_IO_TIMEOUT | ||
46 | int blk_should_fake_timeout(struct request_queue *); | ||
47 | ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); | ||
48 | ssize_t part_timeout_store(struct device *, struct device_attribute *, | ||
49 | const char *, size_t); | ||
50 | #else | ||
51 | static inline int blk_should_fake_timeout(struct request_queue *q) | ||
52 | { | ||
53 | return 0; | ||
54 | } | ||
55 | #endif | ||
20 | 56 | ||
21 | struct io_context *current_io_context(gfp_t gfp_flags, int node); | 57 | struct io_context *current_io_context(gfp_t gfp_flags, int node); |
22 | 58 | ||
@@ -59,4 +95,16 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) | |||
59 | 95 | ||
60 | #endif /* BLK_DEV_INTEGRITY */ | 96 | #endif /* BLK_DEV_INTEGRITY */ |
61 | 97 | ||
98 | static inline int blk_cpu_to_group(int cpu) | ||
99 | { | ||
100 | #ifdef CONFIG_SCHED_MC | ||
101 | cpumask_t mask = cpu_coregroup_map(cpu); | ||
102 | return first_cpu(mask); | ||
103 | #elif defined(CONFIG_SCHED_SMT) | ||
104 | return first_cpu(per_cpu(cpu_sibling_map, cpu)); | ||
105 | #else | ||
106 | return cpu; | ||
107 | #endif | ||
108 | } | ||
109 | |||
62 | #endif | 110 | #endif |
diff --git a/block/blktrace.c b/block/blktrace.c index eb9651ccb241..85049a7e7a17 100644 --- a/block/blktrace.c +++ b/block/blktrace.c | |||
@@ -111,23 +111,9 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, | |||
111 | */ | 111 | */ |
112 | static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; | 112 | static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; |
113 | 113 | ||
114 | /* | 114 | /* The ilog2() calls fall out because they're constant */ |
115 | * Bio action bits of interest | 115 | #define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \ |
116 | */ | 116 | (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) ) |
117 | static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) }; | ||
118 | |||
119 | /* | ||
120 | * More could be added as needed, taking care to increment the decrementer | ||
121 | * to get correct indexing | ||
122 | */ | ||
123 | #define trace_barrier_bit(rw) \ | ||
124 | (((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0)) | ||
125 | #define trace_sync_bit(rw) \ | ||
126 | (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) | ||
127 | #define trace_ahead_bit(rw) \ | ||
128 | (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD)) | ||
129 | #define trace_meta_bit(rw) \ | ||
130 | (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3)) | ||
131 | 117 | ||
132 | /* | 118 | /* |
133 | * The worker for the various blk_add_trace*() types. Fills out a | 119 | * The worker for the various blk_add_trace*() types. Fills out a |
@@ -147,10 +133,11 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
147 | return; | 133 | return; |
148 | 134 | ||
149 | what |= ddir_act[rw & WRITE]; | 135 | what |= ddir_act[rw & WRITE]; |
150 | what |= bio_act[trace_barrier_bit(rw)]; | 136 | what |= MASK_TC_BIT(rw, BARRIER); |
151 | what |= bio_act[trace_sync_bit(rw)]; | 137 | what |= MASK_TC_BIT(rw, SYNC); |
152 | what |= bio_act[trace_ahead_bit(rw)]; | 138 | what |= MASK_TC_BIT(rw, AHEAD); |
153 | what |= bio_act[trace_meta_bit(rw)]; | 139 | what |= MASK_TC_BIT(rw, META); |
140 | what |= MASK_TC_BIT(rw, DISCARD); | ||
154 | 141 | ||
155 | pid = tsk->pid; | 142 | pid = tsk->pid; |
156 | if (unlikely(act_log_check(bt, what, sector, pid))) | 143 | if (unlikely(act_log_check(bt, what, sector, pid))) |
@@ -382,7 +369,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
382 | if (!buts->buf_size || !buts->buf_nr) | 369 | if (!buts->buf_size || !buts->buf_nr) |
383 | return -EINVAL; | 370 | return -EINVAL; |
384 | 371 | ||
385 | strcpy(buts->name, name); | 372 | strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); |
373 | buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; | ||
386 | 374 | ||
387 | /* | 375 | /* |
388 | * some device names have larger paths - convert the slashes | 376 | * some device names have larger paths - convert the slashes |
diff --git a/block/bsg.c b/block/bsg.c index 0aae8d7ba99c..56cb343c76d8 100644 --- a/block/bsg.c +++ b/block/bsg.c | |||
@@ -283,7 +283,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) | |||
283 | next_rq->cmd_type = rq->cmd_type; | 283 | next_rq->cmd_type = rq->cmd_type; |
284 | 284 | ||
285 | dxferp = (void*)(unsigned long)hdr->din_xferp; | 285 | dxferp = (void*)(unsigned long)hdr->din_xferp; |
286 | ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len); | 286 | ret = blk_rq_map_user(q, next_rq, NULL, dxferp, |
287 | hdr->din_xfer_len, GFP_KERNEL); | ||
287 | if (ret) | 288 | if (ret) |
288 | goto out; | 289 | goto out; |
289 | } | 290 | } |
@@ -298,7 +299,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) | |||
298 | dxfer_len = 0; | 299 | dxfer_len = 0; |
299 | 300 | ||
300 | if (dxfer_len) { | 301 | if (dxfer_len) { |
301 | ret = blk_rq_map_user(q, rq, dxferp, dxfer_len); | 302 | ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len, |
303 | GFP_KERNEL); | ||
302 | if (ret) | 304 | if (ret) |
303 | goto out; | 305 | goto out; |
304 | } | 306 | } |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1e2aff812ee2..6a062eebbd15 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -39,6 +39,7 @@ static int cfq_slice_idle = HZ / 125; | |||
39 | #define CFQ_MIN_TT (2) | 39 | #define CFQ_MIN_TT (2) |
40 | 40 | ||
41 | #define CFQ_SLICE_SCALE (5) | 41 | #define CFQ_SLICE_SCALE (5) |
42 | #define CFQ_HW_QUEUE_MIN (5) | ||
42 | 43 | ||
43 | #define RQ_CIC(rq) \ | 44 | #define RQ_CIC(rq) \ |
44 | ((struct cfq_io_context *) (rq)->elevator_private) | 45 | ((struct cfq_io_context *) (rq)->elevator_private) |
@@ -86,7 +87,14 @@ struct cfq_data { | |||
86 | 87 | ||
87 | int rq_in_driver; | 88 | int rq_in_driver; |
88 | int sync_flight; | 89 | int sync_flight; |
90 | |||
91 | /* | ||
92 | * queue-depth detection | ||
93 | */ | ||
94 | int rq_queued; | ||
89 | int hw_tag; | 95 | int hw_tag; |
96 | int hw_tag_samples; | ||
97 | int rq_in_driver_peak; | ||
90 | 98 | ||
91 | /* | 99 | /* |
92 | * idle window management | 100 | * idle window management |
@@ -244,7 +252,7 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) | |||
244 | { | 252 | { |
245 | if (cfqd->busy_queues) { | 253 | if (cfqd->busy_queues) { |
246 | cfq_log(cfqd, "schedule dispatch"); | 254 | cfq_log(cfqd, "schedule dispatch"); |
247 | kblockd_schedule_work(&cfqd->unplug_work); | 255 | kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); |
248 | } | 256 | } |
249 | } | 257 | } |
250 | 258 | ||
@@ -654,15 +662,6 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) | |||
654 | cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", | 662 | cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", |
655 | cfqd->rq_in_driver); | 663 | cfqd->rq_in_driver); |
656 | 664 | ||
657 | /* | ||
658 | * If the depth is larger 1, it really could be queueing. But lets | ||
659 | * make the mark a little higher - idling could still be good for | ||
660 | * low queueing, and a low queueing number could also just indicate | ||
661 | * a SCSI mid layer like behaviour where limit+1 is often seen. | ||
662 | */ | ||
663 | if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) | ||
664 | cfqd->hw_tag = 1; | ||
665 | |||
666 | cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; | 665 | cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; |
667 | } | 666 | } |
668 | 667 | ||
@@ -686,6 +685,7 @@ static void cfq_remove_request(struct request *rq) | |||
686 | list_del_init(&rq->queuelist); | 685 | list_del_init(&rq->queuelist); |
687 | cfq_del_rq_rb(rq); | 686 | cfq_del_rq_rb(rq); |
688 | 687 | ||
688 | cfqq->cfqd->rq_queued--; | ||
689 | if (rq_is_meta(rq)) { | 689 | if (rq_is_meta(rq)) { |
690 | WARN_ON(!cfqq->meta_pending); | 690 | WARN_ON(!cfqq->meta_pending); |
691 | cfqq->meta_pending--; | 691 | cfqq->meta_pending--; |
@@ -878,6 +878,14 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) | |||
878 | struct cfq_io_context *cic; | 878 | struct cfq_io_context *cic; |
879 | unsigned long sl; | 879 | unsigned long sl; |
880 | 880 | ||
881 | /* | ||
882 | * SSD device without seek penalty, disable idling. But only do so | ||
883 | * for devices that support queuing, otherwise we still have a problem | ||
884 | * with sync vs async workloads. | ||
885 | */ | ||
886 | if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag) | ||
887 | return; | ||
888 | |||
881 | WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); | 889 | WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); |
882 | WARN_ON(cfq_cfqq_slice_new(cfqq)); | 890 | WARN_ON(cfq_cfqq_slice_new(cfqq)); |
883 | 891 | ||
@@ -1833,6 +1841,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1833 | { | 1841 | { |
1834 | struct cfq_io_context *cic = RQ_CIC(rq); | 1842 | struct cfq_io_context *cic = RQ_CIC(rq); |
1835 | 1843 | ||
1844 | cfqd->rq_queued++; | ||
1836 | if (rq_is_meta(rq)) | 1845 | if (rq_is_meta(rq)) |
1837 | cfqq->meta_pending++; | 1846 | cfqq->meta_pending++; |
1838 | 1847 | ||
@@ -1880,6 +1889,31 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) | |||
1880 | cfq_rq_enqueued(cfqd, cfqq, rq); | 1889 | cfq_rq_enqueued(cfqd, cfqq, rq); |
1881 | } | 1890 | } |
1882 | 1891 | ||
1892 | /* | ||
1893 | * Update hw_tag based on peak queue depth over 50 samples under | ||
1894 | * sufficient load. | ||
1895 | */ | ||
1896 | static void cfq_update_hw_tag(struct cfq_data *cfqd) | ||
1897 | { | ||
1898 | if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak) | ||
1899 | cfqd->rq_in_driver_peak = cfqd->rq_in_driver; | ||
1900 | |||
1901 | if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && | ||
1902 | cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) | ||
1903 | return; | ||
1904 | |||
1905 | if (cfqd->hw_tag_samples++ < 50) | ||
1906 | return; | ||
1907 | |||
1908 | if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN) | ||
1909 | cfqd->hw_tag = 1; | ||
1910 | else | ||
1911 | cfqd->hw_tag = 0; | ||
1912 | |||
1913 | cfqd->hw_tag_samples = 0; | ||
1914 | cfqd->rq_in_driver_peak = 0; | ||
1915 | } | ||
1916 | |||
1883 | static void cfq_completed_request(struct request_queue *q, struct request *rq) | 1917 | static void cfq_completed_request(struct request_queue *q, struct request *rq) |
1884 | { | 1918 | { |
1885 | struct cfq_queue *cfqq = RQ_CFQQ(rq); | 1919 | struct cfq_queue *cfqq = RQ_CFQQ(rq); |
@@ -1890,6 +1924,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) | |||
1890 | now = jiffies; | 1924 | now = jiffies; |
1891 | cfq_log_cfqq(cfqd, cfqq, "complete"); | 1925 | cfq_log_cfqq(cfqd, cfqq, "complete"); |
1892 | 1926 | ||
1927 | cfq_update_hw_tag(cfqd); | ||
1928 | |||
1893 | WARN_ON(!cfqd->rq_in_driver); | 1929 | WARN_ON(!cfqd->rq_in_driver); |
1894 | WARN_ON(!cfqq->dispatched); | 1930 | WARN_ON(!cfqq->dispatched); |
1895 | cfqd->rq_in_driver--; | 1931 | cfqd->rq_in_driver--; |
@@ -2200,6 +2236,7 @@ static void *cfq_init_queue(struct request_queue *q) | |||
2200 | cfqd->cfq_slice[1] = cfq_slice_sync; | 2236 | cfqd->cfq_slice[1] = cfq_slice_sync; |
2201 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; | 2237 | cfqd->cfq_slice_async_rq = cfq_slice_async_rq; |
2202 | cfqd->cfq_slice_idle = cfq_slice_idle; | 2238 | cfqd->cfq_slice_idle = cfq_slice_idle; |
2239 | cfqd->hw_tag = 1; | ||
2203 | 2240 | ||
2204 | return cfqd; | 2241 | return cfqd; |
2205 | } | 2242 | } |
diff --git a/block/cmd-filter.c b/block/cmd-filter.c index 79c14996ac11..e669aed4c6bc 100644 --- a/block/cmd-filter.c +++ b/block/cmd-filter.c | |||
@@ -211,14 +211,10 @@ int blk_register_filter(struct gendisk *disk) | |||
211 | { | 211 | { |
212 | int ret; | 212 | int ret; |
213 | struct blk_cmd_filter *filter = &disk->queue->cmd_filter; | 213 | struct blk_cmd_filter *filter = &disk->queue->cmd_filter; |
214 | struct kobject *parent = kobject_get(disk->holder_dir->parent); | ||
215 | 214 | ||
216 | if (!parent) | 215 | ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, |
217 | return -ENODEV; | 216 | &disk_to_dev(disk)->kobj, |
218 | |||
219 | ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent, | ||
220 | "%s", "cmd_filter"); | 217 | "%s", "cmd_filter"); |
221 | |||
222 | if (ret < 0) | 218 | if (ret < 0) |
223 | return ret; | 219 | return ret; |
224 | 220 | ||
@@ -231,7 +227,6 @@ void blk_unregister_filter(struct gendisk *disk) | |||
231 | struct blk_cmd_filter *filter = &disk->queue->cmd_filter; | 227 | struct blk_cmd_filter *filter = &disk->queue->cmd_filter; |
232 | 228 | ||
233 | kobject_put(&filter->kobj); | 229 | kobject_put(&filter->kobj); |
234 | kobject_put(disk->holder_dir->parent); | ||
235 | } | 230 | } |
236 | EXPORT_SYMBOL(blk_unregister_filter); | 231 | EXPORT_SYMBOL(blk_unregister_filter); |
237 | #endif | 232 | #endif |
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index c23177e4623f..1e559fba7bdf 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c | |||
@@ -788,6 +788,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
788 | return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); | 788 | return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); |
789 | case BLKFLSBUF: | 789 | case BLKFLSBUF: |
790 | case BLKROSET: | 790 | case BLKROSET: |
791 | case BLKDISCARD: | ||
791 | /* | 792 | /* |
792 | * the ones below are implemented in blkdev_locked_ioctl, | 793 | * the ones below are implemented in blkdev_locked_ioctl, |
793 | * but we call blkdev_ioctl, which gets the lock for us | 794 | * but we call blkdev_ioctl, which gets the lock for us |
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 342448c3d2dd..fd311179f44c 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c | |||
@@ -33,7 +33,7 @@ struct deadline_data { | |||
33 | */ | 33 | */ |
34 | struct rb_root sort_list[2]; | 34 | struct rb_root sort_list[2]; |
35 | struct list_head fifo_list[2]; | 35 | struct list_head fifo_list[2]; |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * next in sort order. read, write or both are NULL | 38 | * next in sort order. read, write or both are NULL |
39 | */ | 39 | */ |
@@ -53,7 +53,11 @@ struct deadline_data { | |||
53 | 53 | ||
54 | static void deadline_move_request(struct deadline_data *, struct request *); | 54 | static void deadline_move_request(struct deadline_data *, struct request *); |
55 | 55 | ||
56 | #define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))]) | 56 | static inline struct rb_root * |
57 | deadline_rb_root(struct deadline_data *dd, struct request *rq) | ||
58 | { | ||
59 | return &dd->sort_list[rq_data_dir(rq)]; | ||
60 | } | ||
57 | 61 | ||
58 | /* | 62 | /* |
59 | * get the request after `rq' in sector-sorted order | 63 | * get the request after `rq' in sector-sorted order |
@@ -72,15 +76,11 @@ deadline_latter_request(struct request *rq) | |||
72 | static void | 76 | static void |
73 | deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) | 77 | deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) |
74 | { | 78 | { |
75 | struct rb_root *root = RQ_RB_ROOT(dd, rq); | 79 | struct rb_root *root = deadline_rb_root(dd, rq); |
76 | struct request *__alias; | 80 | struct request *__alias; |
77 | 81 | ||
78 | retry: | 82 | while (unlikely(__alias = elv_rb_add(root, rq))) |
79 | __alias = elv_rb_add(root, rq); | ||
80 | if (unlikely(__alias)) { | ||
81 | deadline_move_request(dd, __alias); | 83 | deadline_move_request(dd, __alias); |
82 | goto retry; | ||
83 | } | ||
84 | } | 84 | } |
85 | 85 | ||
86 | static inline void | 86 | static inline void |
@@ -91,7 +91,7 @@ deadline_del_rq_rb(struct deadline_data *dd, struct request *rq) | |||
91 | if (dd->next_rq[data_dir] == rq) | 91 | if (dd->next_rq[data_dir] == rq) |
92 | dd->next_rq[data_dir] = deadline_latter_request(rq); | 92 | dd->next_rq[data_dir] = deadline_latter_request(rq); |
93 | 93 | ||
94 | elv_rb_del(RQ_RB_ROOT(dd, rq), rq); | 94 | elv_rb_del(deadline_rb_root(dd, rq), rq); |
95 | } | 95 | } |
96 | 96 | ||
97 | /* | 97 | /* |
@@ -106,7 +106,7 @@ deadline_add_request(struct request_queue *q, struct request *rq) | |||
106 | deadline_add_rq_rb(dd, rq); | 106 | deadline_add_rq_rb(dd, rq); |
107 | 107 | ||
108 | /* | 108 | /* |
109 | * set expire time (only used for reads) and add to fifo list | 109 | * set expire time and add to fifo list |
110 | */ | 110 | */ |
111 | rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]); | 111 | rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]); |
112 | list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); | 112 | list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); |
@@ -162,7 +162,7 @@ static void deadline_merged_request(struct request_queue *q, | |||
162 | * if the merge was a front merge, we need to reposition request | 162 | * if the merge was a front merge, we need to reposition request |
163 | */ | 163 | */ |
164 | if (type == ELEVATOR_FRONT_MERGE) { | 164 | if (type == ELEVATOR_FRONT_MERGE) { |
165 | elv_rb_del(RQ_RB_ROOT(dd, req), req); | 165 | elv_rb_del(deadline_rb_root(dd, req), req); |
166 | deadline_add_rq_rb(dd, req); | 166 | deadline_add_rq_rb(dd, req); |
167 | } | 167 | } |
168 | } | 168 | } |
@@ -212,7 +212,7 @@ deadline_move_request(struct deadline_data *dd, struct request *rq) | |||
212 | dd->next_rq[WRITE] = NULL; | 212 | dd->next_rq[WRITE] = NULL; |
213 | dd->next_rq[data_dir] = deadline_latter_request(rq); | 213 | dd->next_rq[data_dir] = deadline_latter_request(rq); |
214 | 214 | ||
215 | dd->last_sector = rq->sector + rq->nr_sectors; | 215 | dd->last_sector = rq_end_sector(rq); |
216 | 216 | ||
217 | /* | 217 | /* |
218 | * take it off the sort and fifo list, move | 218 | * take it off the sort and fifo list, move |
@@ -222,7 +222,7 @@ deadline_move_request(struct deadline_data *dd, struct request *rq) | |||
222 | } | 222 | } |
223 | 223 | ||
224 | /* | 224 | /* |
225 | * deadline_check_fifo returns 0 if there are no expired reads on the fifo, | 225 | * deadline_check_fifo returns 0 if there are no expired requests on the fifo, |
226 | * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) | 226 | * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) |
227 | */ | 227 | */ |
228 | static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) | 228 | static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) |
@@ -258,17 +258,9 @@ static int deadline_dispatch_requests(struct request_queue *q, int force) | |||
258 | else | 258 | else |
259 | rq = dd->next_rq[READ]; | 259 | rq = dd->next_rq[READ]; |
260 | 260 | ||
261 | if (rq) { | 261 | if (rq && dd->batching < dd->fifo_batch) |
262 | /* we have a "next request" */ | 262 | /* we have a next request are still entitled to batch */ |
263 | 263 | goto dispatch_request; | |
264 | if (dd->last_sector != rq->sector) | ||
265 | /* end the batch on a non sequential request */ | ||
266 | dd->batching += dd->fifo_batch; | ||
267 | |||
268 | if (dd->batching < dd->fifo_batch) | ||
269 | /* we are still entitled to batch */ | ||
270 | goto dispatch_request; | ||
271 | } | ||
272 | 264 | ||
273 | /* | 265 | /* |
274 | * at this point we are not running a batch. select the appropriate | 266 | * at this point we are not running a batch. select the appropriate |
diff --git a/block/elevator.c b/block/elevator.c index ed6f8f32d27e..04518921db31 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -34,8 +34,9 @@ | |||
34 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
35 | #include <linux/blktrace_api.h> | 35 | #include <linux/blktrace_api.h> |
36 | #include <linux/hash.h> | 36 | #include <linux/hash.h> |
37 | #include <linux/uaccess.h> | ||
37 | 38 | ||
38 | #include <asm/uaccess.h> | 39 | #include "blk.h" |
39 | 40 | ||
40 | static DEFINE_SPINLOCK(elv_list_lock); | 41 | static DEFINE_SPINLOCK(elv_list_lock); |
41 | static LIST_HEAD(elv_list); | 42 | static LIST_HEAD(elv_list); |
@@ -75,6 +76,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) | |||
75 | return 0; | 76 | return 0; |
76 | 77 | ||
77 | /* | 78 | /* |
79 | * Don't merge file system requests and discard requests | ||
80 | */ | ||
81 | if (bio_discard(bio) != bio_discard(rq->bio)) | ||
82 | return 0; | ||
83 | |||
84 | /* | ||
78 | * different data direction or already started, don't merge | 85 | * different data direction or already started, don't merge |
79 | */ | 86 | */ |
80 | if (bio_data_dir(bio) != rq_data_dir(rq)) | 87 | if (bio_data_dir(bio) != rq_data_dir(rq)) |
@@ -438,6 +445,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) | |||
438 | list_for_each_prev(entry, &q->queue_head) { | 445 | list_for_each_prev(entry, &q->queue_head) { |
439 | struct request *pos = list_entry_rq(entry); | 446 | struct request *pos = list_entry_rq(entry); |
440 | 447 | ||
448 | if (blk_discard_rq(rq) != blk_discard_rq(pos)) | ||
449 | break; | ||
441 | if (rq_data_dir(rq) != rq_data_dir(pos)) | 450 | if (rq_data_dir(rq) != rq_data_dir(pos)) |
442 | break; | 451 | break; |
443 | if (pos->cmd_flags & stop_flags) | 452 | if (pos->cmd_flags & stop_flags) |
@@ -607,7 +616,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) | |||
607 | break; | 616 | break; |
608 | 617 | ||
609 | case ELEVATOR_INSERT_SORT: | 618 | case ELEVATOR_INSERT_SORT: |
610 | BUG_ON(!blk_fs_request(rq)); | 619 | BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq)); |
611 | rq->cmd_flags |= REQ_SORTED; | 620 | rq->cmd_flags |= REQ_SORTED; |
612 | q->nr_sorted++; | 621 | q->nr_sorted++; |
613 | if (rq_mergeable(rq)) { | 622 | if (rq_mergeable(rq)) { |
@@ -692,7 +701,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where, | |||
692 | * this request is scheduling boundary, update | 701 | * this request is scheduling boundary, update |
693 | * end_sector | 702 | * end_sector |
694 | */ | 703 | */ |
695 | if (blk_fs_request(rq)) { | 704 | if (blk_fs_request(rq) || blk_discard_rq(rq)) { |
696 | q->end_sector = rq_end_sector(rq); | 705 | q->end_sector = rq_end_sector(rq); |
697 | q->boundary_rq = rq; | 706 | q->boundary_rq = rq; |
698 | } | 707 | } |
@@ -745,7 +754,7 @@ struct request *elv_next_request(struct request_queue *q) | |||
745 | * not ever see it. | 754 | * not ever see it. |
746 | */ | 755 | */ |
747 | if (blk_empty_barrier(rq)) { | 756 | if (blk_empty_barrier(rq)) { |
748 | end_queued_request(rq, 1); | 757 | __blk_end_request(rq, 0, blk_rq_bytes(rq)); |
749 | continue; | 758 | continue; |
750 | } | 759 | } |
751 | if (!(rq->cmd_flags & REQ_STARTED)) { | 760 | if (!(rq->cmd_flags & REQ_STARTED)) { |
@@ -764,6 +773,12 @@ struct request *elv_next_request(struct request_queue *q) | |||
764 | */ | 773 | */ |
765 | rq->cmd_flags |= REQ_STARTED; | 774 | rq->cmd_flags |= REQ_STARTED; |
766 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); | 775 | blk_add_trace_rq(q, rq, BLK_TA_ISSUE); |
776 | |||
777 | /* | ||
778 | * We are now handing the request to the hardware, | ||
779 | * add the timeout handler | ||
780 | */ | ||
781 | blk_add_timer(rq); | ||
767 | } | 782 | } |
768 | 783 | ||
769 | if (!q->boundary_rq || q->boundary_rq == rq) { | 784 | if (!q->boundary_rq || q->boundary_rq == rq) { |
@@ -782,7 +797,6 @@ struct request *elv_next_request(struct request_queue *q) | |||
782 | * device can handle | 797 | * device can handle |
783 | */ | 798 | */ |
784 | rq->nr_phys_segments++; | 799 | rq->nr_phys_segments++; |
785 | rq->nr_hw_segments++; | ||
786 | } | 800 | } |
787 | 801 | ||
788 | if (!q->prep_rq_fn) | 802 | if (!q->prep_rq_fn) |
@@ -805,14 +819,13 @@ struct request *elv_next_request(struct request_queue *q) | |||
805 | * so that we don't add it again | 819 | * so that we don't add it again |
806 | */ | 820 | */ |
807 | --rq->nr_phys_segments; | 821 | --rq->nr_phys_segments; |
808 | --rq->nr_hw_segments; | ||
809 | } | 822 | } |
810 | 823 | ||
811 | rq = NULL; | 824 | rq = NULL; |
812 | break; | 825 | break; |
813 | } else if (ret == BLKPREP_KILL) { | 826 | } else if (ret == BLKPREP_KILL) { |
814 | rq->cmd_flags |= REQ_QUIET; | 827 | rq->cmd_flags |= REQ_QUIET; |
815 | end_queued_request(rq, 0); | 828 | __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); |
816 | } else { | 829 | } else { |
817 | printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); | 830 | printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); |
818 | break; | 831 | break; |
@@ -901,6 +914,19 @@ int elv_may_queue(struct request_queue *q, int rw) | |||
901 | return ELV_MQUEUE_MAY; | 914 | return ELV_MQUEUE_MAY; |
902 | } | 915 | } |
903 | 916 | ||
917 | void elv_abort_queue(struct request_queue *q) | ||
918 | { | ||
919 | struct request *rq; | ||
920 | |||
921 | while (!list_empty(&q->queue_head)) { | ||
922 | rq = list_entry_rq(q->queue_head.next); | ||
923 | rq->cmd_flags |= REQ_QUIET; | ||
924 | blk_add_trace_rq(q, rq, BLK_TA_ABORT); | ||
925 | __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); | ||
926 | } | ||
927 | } | ||
928 | EXPORT_SYMBOL(elv_abort_queue); | ||
929 | |||
904 | void elv_completed_request(struct request_queue *q, struct request *rq) | 930 | void elv_completed_request(struct request_queue *q, struct request *rq) |
905 | { | 931 | { |
906 | elevator_t *e = q->elevator; | 932 | elevator_t *e = q->elevator; |
diff --git a/block/genhd.c b/block/genhd.c index e0ce23ac2ece..4cd3433c99ac 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/kobj_map.h> | 16 | #include <linux/kobj_map.h> |
17 | #include <linux/buffer_head.h> | 17 | #include <linux/buffer_head.h> |
18 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
19 | #include <linux/idr.h> | ||
19 | 20 | ||
20 | #include "blk.h" | 21 | #include "blk.h" |
21 | 22 | ||
@@ -24,8 +25,194 @@ static DEFINE_MUTEX(block_class_lock); | |||
24 | struct kobject *block_depr; | 25 | struct kobject *block_depr; |
25 | #endif | 26 | #endif |
26 | 27 | ||
28 | /* for extended dynamic devt allocation, currently only one major is used */ | ||
29 | #define MAX_EXT_DEVT (1 << MINORBITS) | ||
30 | |||
31 | /* For extended devt allocation. ext_devt_mutex prevents look up | ||
32 | * results from going away underneath its user. | ||
33 | */ | ||
34 | static DEFINE_MUTEX(ext_devt_mutex); | ||
35 | static DEFINE_IDR(ext_devt_idr); | ||
36 | |||
27 | static struct device_type disk_type; | 37 | static struct device_type disk_type; |
28 | 38 | ||
39 | /** | ||
40 | * disk_get_part - get partition | ||
41 | * @disk: disk to look partition from | ||
42 | * @partno: partition number | ||
43 | * | ||
44 | * Look for partition @partno from @disk. If found, increment | ||
45 | * reference count and return it. | ||
46 | * | ||
47 | * CONTEXT: | ||
48 | * Don't care. | ||
49 | * | ||
50 | * RETURNS: | ||
51 | * Pointer to the found partition on success, NULL if not found. | ||
52 | */ | ||
53 | struct hd_struct *disk_get_part(struct gendisk *disk, int partno) | ||
54 | { | ||
55 | struct hd_struct *part = NULL; | ||
56 | struct disk_part_tbl *ptbl; | ||
57 | |||
58 | if (unlikely(partno < 0)) | ||
59 | return NULL; | ||
60 | |||
61 | rcu_read_lock(); | ||
62 | |||
63 | ptbl = rcu_dereference(disk->part_tbl); | ||
64 | if (likely(partno < ptbl->len)) { | ||
65 | part = rcu_dereference(ptbl->part[partno]); | ||
66 | if (part) | ||
67 | get_device(part_to_dev(part)); | ||
68 | } | ||
69 | |||
70 | rcu_read_unlock(); | ||
71 | |||
72 | return part; | ||
73 | } | ||
74 | EXPORT_SYMBOL_GPL(disk_get_part); | ||
75 | |||
76 | /** | ||
77 | * disk_part_iter_init - initialize partition iterator | ||
78 | * @piter: iterator to initialize | ||
79 | * @disk: disk to iterate over | ||
80 | * @flags: DISK_PITER_* flags | ||
81 | * | ||
82 | * Initialize @piter so that it iterates over partitions of @disk. | ||
83 | * | ||
84 | * CONTEXT: | ||
85 | * Don't care. | ||
86 | */ | ||
87 | void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, | ||
88 | unsigned int flags) | ||
89 | { | ||
90 | struct disk_part_tbl *ptbl; | ||
91 | |||
92 | rcu_read_lock(); | ||
93 | ptbl = rcu_dereference(disk->part_tbl); | ||
94 | |||
95 | piter->disk = disk; | ||
96 | piter->part = NULL; | ||
97 | |||
98 | if (flags & DISK_PITER_REVERSE) | ||
99 | piter->idx = ptbl->len - 1; | ||
100 | else if (flags & DISK_PITER_INCL_PART0) | ||
101 | piter->idx = 0; | ||
102 | else | ||
103 | piter->idx = 1; | ||
104 | |||
105 | piter->flags = flags; | ||
106 | |||
107 | rcu_read_unlock(); | ||
108 | } | ||
109 | EXPORT_SYMBOL_GPL(disk_part_iter_init); | ||
110 | |||
111 | /** | ||
112 | * disk_part_iter_next - proceed iterator to the next partition and return it | ||
113 | * @piter: iterator of interest | ||
114 | * | ||
115 | * Proceed @piter to the next partition and return it. | ||
116 | * | ||
117 | * CONTEXT: | ||
118 | * Don't care. | ||
119 | */ | ||
120 | struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) | ||
121 | { | ||
122 | struct disk_part_tbl *ptbl; | ||
123 | int inc, end; | ||
124 | |||
125 | /* put the last partition */ | ||
126 | disk_put_part(piter->part); | ||
127 | piter->part = NULL; | ||
128 | |||
129 | /* get part_tbl */ | ||
130 | rcu_read_lock(); | ||
131 | ptbl = rcu_dereference(piter->disk->part_tbl); | ||
132 | |||
133 | /* determine iteration parameters */ | ||
134 | if (piter->flags & DISK_PITER_REVERSE) { | ||
135 | inc = -1; | ||
136 | if (piter->flags & DISK_PITER_INCL_PART0) | ||
137 | end = -1; | ||
138 | else | ||
139 | end = 0; | ||
140 | } else { | ||
141 | inc = 1; | ||
142 | end = ptbl->len; | ||
143 | } | ||
144 | |||
145 | /* iterate to the next partition */ | ||
146 | for (; piter->idx != end; piter->idx += inc) { | ||
147 | struct hd_struct *part; | ||
148 | |||
149 | part = rcu_dereference(ptbl->part[piter->idx]); | ||
150 | if (!part) | ||
151 | continue; | ||
152 | if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) | ||
153 | continue; | ||
154 | |||
155 | get_device(part_to_dev(part)); | ||
156 | piter->part = part; | ||
157 | piter->idx += inc; | ||
158 | break; | ||
159 | } | ||
160 | |||
161 | rcu_read_unlock(); | ||
162 | |||
163 | return piter->part; | ||
164 | } | ||
165 | EXPORT_SYMBOL_GPL(disk_part_iter_next); | ||
166 | |||
167 | /** | ||
168 | * disk_part_iter_exit - finish up partition iteration | ||
169 | * @piter: iter of interest | ||
170 | * | ||
171 | * Called when iteration is over. Cleans up @piter. | ||
172 | * | ||
173 | * CONTEXT: | ||
174 | * Don't care. | ||
175 | */ | ||
176 | void disk_part_iter_exit(struct disk_part_iter *piter) | ||
177 | { | ||
178 | disk_put_part(piter->part); | ||
179 | piter->part = NULL; | ||
180 | } | ||
181 | EXPORT_SYMBOL_GPL(disk_part_iter_exit); | ||
182 | |||
183 | /** | ||
184 | * disk_map_sector_rcu - map sector to partition | ||
185 | * @disk: gendisk of interest | ||
186 | * @sector: sector to map | ||
187 | * | ||
188 | * Find out which partition @sector maps to on @disk. This is | ||
189 | * primarily used for stats accounting. | ||
190 | * | ||
191 | * CONTEXT: | ||
192 | * RCU read locked. The returned partition pointer is valid only | ||
193 | * while preemption is disabled. | ||
194 | * | ||
195 | * RETURNS: | ||
196 | * Found partition on success, part0 is returned if no partition matches | ||
197 | */ | ||
198 | struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) | ||
199 | { | ||
200 | struct disk_part_tbl *ptbl; | ||
201 | int i; | ||
202 | |||
203 | ptbl = rcu_dereference(disk->part_tbl); | ||
204 | |||
205 | for (i = 1; i < ptbl->len; i++) { | ||
206 | struct hd_struct *part = rcu_dereference(ptbl->part[i]); | ||
207 | |||
208 | if (part && part->start_sect <= sector && | ||
209 | sector < part->start_sect + part->nr_sects) | ||
210 | return part; | ||
211 | } | ||
212 | return &disk->part0; | ||
213 | } | ||
214 | EXPORT_SYMBOL_GPL(disk_map_sector_rcu); | ||
215 | |||
29 | /* | 216 | /* |
30 | * Can be deleted altogether. Later. | 217 | * Can be deleted altogether. Later. |
31 | * | 218 | * |
@@ -43,14 +230,14 @@ static inline int major_to_index(int major) | |||
43 | } | 230 | } |
44 | 231 | ||
45 | #ifdef CONFIG_PROC_FS | 232 | #ifdef CONFIG_PROC_FS |
46 | void blkdev_show(struct seq_file *f, off_t offset) | 233 | void blkdev_show(struct seq_file *seqf, off_t offset) |
47 | { | 234 | { |
48 | struct blk_major_name *dp; | 235 | struct blk_major_name *dp; |
49 | 236 | ||
50 | if (offset < BLKDEV_MAJOR_HASH_SIZE) { | 237 | if (offset < BLKDEV_MAJOR_HASH_SIZE) { |
51 | mutex_lock(&block_class_lock); | 238 | mutex_lock(&block_class_lock); |
52 | for (dp = major_names[offset]; dp; dp = dp->next) | 239 | for (dp = major_names[offset]; dp; dp = dp->next) |
53 | seq_printf(f, "%3d %s\n", dp->major, dp->name); | 240 | seq_printf(seqf, "%3d %s\n", dp->major, dp->name); |
54 | mutex_unlock(&block_class_lock); | 241 | mutex_unlock(&block_class_lock); |
55 | } | 242 | } |
56 | } | 243 | } |
@@ -136,6 +323,118 @@ EXPORT_SYMBOL(unregister_blkdev); | |||
136 | 323 | ||
137 | static struct kobj_map *bdev_map; | 324 | static struct kobj_map *bdev_map; |
138 | 325 | ||
326 | /** | ||
327 | * blk_mangle_minor - scatter minor numbers apart | ||
328 | * @minor: minor number to mangle | ||
329 | * | ||
330 | * Scatter consecutively allocated @minor number apart if MANGLE_DEVT | ||
331 | * is enabled. Mangling twice gives the original value. | ||
332 | * | ||
333 | * RETURNS: | ||
334 | * Mangled value. | ||
335 | * | ||
336 | * CONTEXT: | ||
337 | * Don't care. | ||
338 | */ | ||
339 | static int blk_mangle_minor(int minor) | ||
340 | { | ||
341 | #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT | ||
342 | int i; | ||
343 | |||
344 | for (i = 0; i < MINORBITS / 2; i++) { | ||
345 | int low = minor & (1 << i); | ||
346 | int high = minor & (1 << (MINORBITS - 1 - i)); | ||
347 | int distance = MINORBITS - 1 - 2 * i; | ||
348 | |||
349 | minor ^= low | high; /* clear both bits */ | ||
350 | low <<= distance; /* swap the positions */ | ||
351 | high >>= distance; | ||
352 | minor |= low | high; /* and set */ | ||
353 | } | ||
354 | #endif | ||
355 | return minor; | ||
356 | } | ||
357 | |||
358 | /** | ||
359 | * blk_alloc_devt - allocate a dev_t for a partition | ||
360 | * @part: partition to allocate dev_t for | ||
361 | * @gfp_mask: memory allocation flag | ||
362 | * @devt: out parameter for resulting dev_t | ||
363 | * | ||
364 | * Allocate a dev_t for block device. | ||
365 | * | ||
366 | * RETURNS: | ||
367 | * 0 on success, allocated dev_t is returned in *@devt. -errno on | ||
368 | * failure. | ||
369 | * | ||
370 | * CONTEXT: | ||
371 | * Might sleep. | ||
372 | */ | ||
373 | int blk_alloc_devt(struct hd_struct *part, dev_t *devt) | ||
374 | { | ||
375 | struct gendisk *disk = part_to_disk(part); | ||
376 | int idx, rc; | ||
377 | |||
378 | /* in consecutive minor range? */ | ||
379 | if (part->partno < disk->minors) { | ||
380 | *devt = MKDEV(disk->major, disk->first_minor + part->partno); | ||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | /* allocate ext devt */ | ||
385 | do { | ||
386 | if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL)) | ||
387 | return -ENOMEM; | ||
388 | rc = idr_get_new(&ext_devt_idr, part, &idx); | ||
389 | } while (rc == -EAGAIN); | ||
390 | |||
391 | if (rc) | ||
392 | return rc; | ||
393 | |||
394 | if (idx > MAX_EXT_DEVT) { | ||
395 | idr_remove(&ext_devt_idr, idx); | ||
396 | return -EBUSY; | ||
397 | } | ||
398 | |||
399 | *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); | ||
400 | return 0; | ||
401 | } | ||
402 | |||
403 | /** | ||
404 | * blk_free_devt - free a dev_t | ||
405 | * @devt: dev_t to free | ||
406 | * | ||
407 | * Free @devt which was allocated using blk_alloc_devt(). | ||
408 | * | ||
409 | * CONTEXT: | ||
410 | * Might sleep. | ||
411 | */ | ||
412 | void blk_free_devt(dev_t devt) | ||
413 | { | ||
414 | might_sleep(); | ||
415 | |||
416 | if (devt == MKDEV(0, 0)) | ||
417 | return; | ||
418 | |||
419 | if (MAJOR(devt) == BLOCK_EXT_MAJOR) { | ||
420 | mutex_lock(&ext_devt_mutex); | ||
421 | idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); | ||
422 | mutex_unlock(&ext_devt_mutex); | ||
423 | } | ||
424 | } | ||
425 | |||
426 | static char *bdevt_str(dev_t devt, char *buf) | ||
427 | { | ||
428 | if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { | ||
429 | char tbuf[BDEVT_SIZE]; | ||
430 | snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); | ||
431 | snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); | ||
432 | } else | ||
433 | snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); | ||
434 | |||
435 | return buf; | ||
436 | } | ||
437 | |||
139 | /* | 438 | /* |
140 | * Register device numbers dev..(dev+range-1) | 439 | * Register device numbers dev..(dev+range-1) |
141 | * range must be nonzero | 440 | * range must be nonzero |
@@ -157,11 +456,11 @@ void blk_unregister_region(dev_t devt, unsigned long range) | |||
157 | 456 | ||
158 | EXPORT_SYMBOL(blk_unregister_region); | 457 | EXPORT_SYMBOL(blk_unregister_region); |
159 | 458 | ||
160 | static struct kobject *exact_match(dev_t devt, int *part, void *data) | 459 | static struct kobject *exact_match(dev_t devt, int *partno, void *data) |
161 | { | 460 | { |
162 | struct gendisk *p = data; | 461 | struct gendisk *p = data; |
163 | 462 | ||
164 | return &p->dev.kobj; | 463 | return &disk_to_dev(p)->kobj; |
165 | } | 464 | } |
166 | 465 | ||
167 | static int exact_lock(dev_t devt, void *data) | 466 | static int exact_lock(dev_t devt, void *data) |
@@ -179,21 +478,46 @@ static int exact_lock(dev_t devt, void *data) | |||
179 | * | 478 | * |
180 | * This function registers the partitioning information in @disk | 479 | * This function registers the partitioning information in @disk |
181 | * with the kernel. | 480 | * with the kernel. |
481 | * | ||
482 | * FIXME: error handling | ||
182 | */ | 483 | */ |
183 | void add_disk(struct gendisk *disk) | 484 | void add_disk(struct gendisk *disk) |
184 | { | 485 | { |
185 | struct backing_dev_info *bdi; | 486 | struct backing_dev_info *bdi; |
487 | dev_t devt; | ||
186 | int retval; | 488 | int retval; |
187 | 489 | ||
490 | /* minors == 0 indicates to use ext devt from part0 and should | ||
491 | * be accompanied with EXT_DEVT flag. Make sure all | ||
492 | * parameters make sense. | ||
493 | */ | ||
494 | WARN_ON(disk->minors && !(disk->major || disk->first_minor)); | ||
495 | WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT)); | ||
496 | |||
188 | disk->flags |= GENHD_FL_UP; | 497 | disk->flags |= GENHD_FL_UP; |
189 | blk_register_region(MKDEV(disk->major, disk->first_minor), | 498 | |
190 | disk->minors, NULL, exact_match, exact_lock, disk); | 499 | retval = blk_alloc_devt(&disk->part0, &devt); |
500 | if (retval) { | ||
501 | WARN_ON(1); | ||
502 | return; | ||
503 | } | ||
504 | disk_to_dev(disk)->devt = devt; | ||
505 | |||
506 | /* ->major and ->first_minor aren't supposed to be | ||
507 | * dereferenced from here on, but set them just in case. | ||
508 | */ | ||
509 | disk->major = MAJOR(devt); | ||
510 | disk->first_minor = MINOR(devt); | ||
511 | |||
512 | blk_register_region(disk_devt(disk), disk->minors, NULL, | ||
513 | exact_match, exact_lock, disk); | ||
191 | register_disk(disk); | 514 | register_disk(disk); |
192 | blk_register_queue(disk); | 515 | blk_register_queue(disk); |
193 | 516 | ||
194 | bdi = &disk->queue->backing_dev_info; | 517 | bdi = &disk->queue->backing_dev_info; |
195 | bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); | 518 | bdi_register_dev(bdi, disk_devt(disk)); |
196 | retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); | 519 | retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, |
520 | "bdi"); | ||
197 | WARN_ON(retval); | 521 | WARN_ON(retval); |
198 | } | 522 | } |
199 | 523 | ||
@@ -202,78 +526,71 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ | |||
202 | 526 | ||
203 | void unlink_gendisk(struct gendisk *disk) | 527 | void unlink_gendisk(struct gendisk *disk) |
204 | { | 528 | { |
205 | sysfs_remove_link(&disk->dev.kobj, "bdi"); | 529 | sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); |
206 | bdi_unregister(&disk->queue->backing_dev_info); | 530 | bdi_unregister(&disk->queue->backing_dev_info); |
207 | blk_unregister_queue(disk); | 531 | blk_unregister_queue(disk); |
208 | blk_unregister_region(MKDEV(disk->major, disk->first_minor), | 532 | blk_unregister_region(disk_devt(disk), disk->minors); |
209 | disk->minors); | ||
210 | } | 533 | } |
211 | 534 | ||
212 | /** | 535 | /** |
213 | * get_gendisk - get partitioning information for a given device | 536 | * get_gendisk - get partitioning information for a given device |
214 | * @dev: device to get partitioning information for | 537 | * @devt: device to get partitioning information for |
538 | * @part: returned partition index | ||
215 | * | 539 | * |
216 | * This function gets the structure containing partitioning | 540 | * This function gets the structure containing partitioning |
217 | * information for the given device @dev. | 541 | * information for the given device @devt. |
218 | */ | 542 | */ |
219 | struct gendisk *get_gendisk(dev_t devt, int *part) | 543 | struct gendisk *get_gendisk(dev_t devt, int *partno) |
220 | { | 544 | { |
221 | struct kobject *kobj = kobj_lookup(bdev_map, devt, part); | 545 | struct gendisk *disk = NULL; |
222 | struct device *dev = kobj_to_dev(kobj); | 546 | |
547 | if (MAJOR(devt) != BLOCK_EXT_MAJOR) { | ||
548 | struct kobject *kobj; | ||
549 | |||
550 | kobj = kobj_lookup(bdev_map, devt, partno); | ||
551 | if (kobj) | ||
552 | disk = dev_to_disk(kobj_to_dev(kobj)); | ||
553 | } else { | ||
554 | struct hd_struct *part; | ||
223 | 555 | ||
224 | return kobj ? dev_to_disk(dev) : NULL; | 556 | mutex_lock(&ext_devt_mutex); |
557 | part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); | ||
558 | if (part && get_disk(part_to_disk(part))) { | ||
559 | *partno = part->partno; | ||
560 | disk = part_to_disk(part); | ||
561 | } | ||
562 | mutex_unlock(&ext_devt_mutex); | ||
563 | } | ||
564 | |||
565 | return disk; | ||
225 | } | 566 | } |
226 | 567 | ||
227 | /* | 568 | /** |
228 | * print a partitions - intended for places where the root filesystem can't be | 569 | * bdget_disk - do bdget() by gendisk and partition number |
229 | * mounted and thus to give the victim some idea of what went wrong | 570 | * @disk: gendisk of interest |
571 | * @partno: partition number | ||
572 | * | ||
573 | * Find partition @partno from @disk, do bdget() on it. | ||
574 | * | ||
575 | * CONTEXT: | ||
576 | * Don't care. | ||
577 | * | ||
578 | * RETURNS: | ||
579 | * Resulting block_device on success, NULL on failure. | ||
230 | */ | 580 | */ |
231 | static int printk_partition(struct device *dev, void *data) | 581 | struct block_device *bdget_disk(struct gendisk *disk, int partno) |
232 | { | 582 | { |
233 | struct gendisk *sgp; | 583 | struct hd_struct *part; |
234 | char buf[BDEVNAME_SIZE]; | 584 | struct block_device *bdev = NULL; |
235 | int n; | ||
236 | |||
237 | if (dev->type != &disk_type) | ||
238 | goto exit; | ||
239 | 585 | ||
240 | sgp = dev_to_disk(dev); | 586 | part = disk_get_part(disk, partno); |
241 | /* | 587 | if (part) |
242 | * Don't show empty devices or things that have been surpressed | 588 | bdev = bdget(part_devt(part)); |
243 | */ | 589 | disk_put_part(part); |
244 | if (get_capacity(sgp) == 0 || | ||
245 | (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) | ||
246 | goto exit; | ||
247 | 590 | ||
248 | /* | 591 | return bdev; |
249 | * Note, unlike /proc/partitions, I am showing the numbers in | ||
250 | * hex - the same format as the root= option takes. | ||
251 | */ | ||
252 | printk("%02x%02x %10llu %s", | ||
253 | sgp->major, sgp->first_minor, | ||
254 | (unsigned long long)get_capacity(sgp) >> 1, | ||
255 | disk_name(sgp, 0, buf)); | ||
256 | if (sgp->driverfs_dev != NULL && | ||
257 | sgp->driverfs_dev->driver != NULL) | ||
258 | printk(" driver: %s\n", | ||
259 | sgp->driverfs_dev->driver->name); | ||
260 | else | ||
261 | printk(" (driver?)\n"); | ||
262 | |||
263 | /* now show the partitions */ | ||
264 | for (n = 0; n < sgp->minors - 1; ++n) { | ||
265 | if (sgp->part[n] == NULL) | ||
266 | goto exit; | ||
267 | if (sgp->part[n]->nr_sects == 0) | ||
268 | goto exit; | ||
269 | printk(" %02x%02x %10llu %s\n", | ||
270 | sgp->major, n + 1 + sgp->first_minor, | ||
271 | (unsigned long long)sgp->part[n]->nr_sects >> 1, | ||
272 | disk_name(sgp, n + 1, buf)); | ||
273 | } | ||
274 | exit: | ||
275 | return 0; | ||
276 | } | 592 | } |
593 | EXPORT_SYMBOL(bdget_disk); | ||
277 | 594 | ||
278 | /* | 595 | /* |
279 | * print a full list of all partitions - intended for places where the root | 596 | * print a full list of all partitions - intended for places where the root |
@@ -282,120 +599,145 @@ exit: | |||
282 | */ | 599 | */ |
283 | void __init printk_all_partitions(void) | 600 | void __init printk_all_partitions(void) |
284 | { | 601 | { |
285 | mutex_lock(&block_class_lock); | 602 | struct class_dev_iter iter; |
286 | class_for_each_device(&block_class, NULL, NULL, printk_partition); | 603 | struct device *dev; |
287 | mutex_unlock(&block_class_lock); | 604 | |
605 | class_dev_iter_init(&iter, &block_class, NULL, &disk_type); | ||
606 | while ((dev = class_dev_iter_next(&iter))) { | ||
607 | struct gendisk *disk = dev_to_disk(dev); | ||
608 | struct disk_part_iter piter; | ||
609 | struct hd_struct *part; | ||
610 | char name_buf[BDEVNAME_SIZE]; | ||
611 | char devt_buf[BDEVT_SIZE]; | ||
612 | |||
613 | /* | ||
614 | * Don't show empty devices or things that have been | ||
615 | * surpressed | ||
616 | */ | ||
617 | if (get_capacity(disk) == 0 || | ||
618 | (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) | ||
619 | continue; | ||
620 | |||
621 | /* | ||
622 | * Note, unlike /proc/partitions, I am showing the | ||
623 | * numbers in hex - the same format as the root= | ||
624 | * option takes. | ||
625 | */ | ||
626 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); | ||
627 | while ((part = disk_part_iter_next(&piter))) { | ||
628 | bool is_part0 = part == &disk->part0; | ||
629 | |||
630 | printk("%s%s %10llu %s", is_part0 ? "" : " ", | ||
631 | bdevt_str(part_devt(part), devt_buf), | ||
632 | (unsigned long long)part->nr_sects >> 1, | ||
633 | disk_name(disk, part->partno, name_buf)); | ||
634 | if (is_part0) { | ||
635 | if (disk->driverfs_dev != NULL && | ||
636 | disk->driverfs_dev->driver != NULL) | ||
637 | printk(" driver: %s\n", | ||
638 | disk->driverfs_dev->driver->name); | ||
639 | else | ||
640 | printk(" (driver?)\n"); | ||
641 | } else | ||
642 | printk("\n"); | ||
643 | } | ||
644 | disk_part_iter_exit(&piter); | ||
645 | } | ||
646 | class_dev_iter_exit(&iter); | ||
288 | } | 647 | } |
289 | 648 | ||
290 | #ifdef CONFIG_PROC_FS | 649 | #ifdef CONFIG_PROC_FS |
291 | /* iterator */ | 650 | /* iterator */ |
292 | static int find_start(struct device *dev, void *data) | 651 | static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) |
293 | { | 652 | { |
294 | loff_t *k = data; | 653 | loff_t skip = *pos; |
654 | struct class_dev_iter *iter; | ||
655 | struct device *dev; | ||
295 | 656 | ||
296 | if (dev->type != &disk_type) | 657 | iter = kmalloc(sizeof(*iter), GFP_KERNEL); |
297 | return 0; | 658 | if (!iter) |
298 | if (!*k) | 659 | return ERR_PTR(-ENOMEM); |
299 | return 1; | 660 | |
300 | (*k)--; | 661 | seqf->private = iter; |
301 | return 0; | 662 | class_dev_iter_init(iter, &block_class, NULL, &disk_type); |
663 | do { | ||
664 | dev = class_dev_iter_next(iter); | ||
665 | if (!dev) | ||
666 | return NULL; | ||
667 | } while (skip--); | ||
668 | |||
669 | return dev_to_disk(dev); | ||
302 | } | 670 | } |
303 | 671 | ||
304 | static void *part_start(struct seq_file *part, loff_t *pos) | 672 | static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) |
305 | { | 673 | { |
306 | struct device *dev; | 674 | struct device *dev; |
307 | loff_t k = *pos; | ||
308 | |||
309 | if (!k) | ||
310 | part->private = (void *)1LU; /* tell show to print header */ | ||
311 | 675 | ||
312 | mutex_lock(&block_class_lock); | 676 | (*pos)++; |
313 | dev = class_find_device(&block_class, NULL, &k, find_start); | 677 | dev = class_dev_iter_next(seqf->private); |
314 | if (dev) { | 678 | if (dev) |
315 | put_device(dev); | ||
316 | return dev_to_disk(dev); | 679 | return dev_to_disk(dev); |
317 | } | 680 | |
318 | return NULL; | 681 | return NULL; |
319 | } | 682 | } |
320 | 683 | ||
321 | static int find_next(struct device *dev, void *data) | 684 | static void disk_seqf_stop(struct seq_file *seqf, void *v) |
322 | { | 685 | { |
323 | if (dev->type == &disk_type) | 686 | struct class_dev_iter *iter = seqf->private; |
324 | return 1; | ||
325 | return 0; | ||
326 | } | ||
327 | 687 | ||
328 | static void *part_next(struct seq_file *part, void *v, loff_t *pos) | 688 | /* stop is called even after start failed :-( */ |
329 | { | 689 | if (iter) { |
330 | struct gendisk *gp = v; | 690 | class_dev_iter_exit(iter); |
331 | struct device *dev; | 691 | kfree(iter); |
332 | ++*pos; | ||
333 | dev = class_find_device(&block_class, &gp->dev, NULL, find_next); | ||
334 | if (dev) { | ||
335 | put_device(dev); | ||
336 | return dev_to_disk(dev); | ||
337 | } | 692 | } |
338 | return NULL; | ||
339 | } | 693 | } |
340 | 694 | ||
341 | static void part_stop(struct seq_file *part, void *v) | 695 | static void *show_partition_start(struct seq_file *seqf, loff_t *pos) |
342 | { | 696 | { |
343 | mutex_unlock(&block_class_lock); | 697 | static void *p; |
698 | |||
699 | p = disk_seqf_start(seqf, pos); | ||
700 | if (!IS_ERR(p) && p && !*pos) | ||
701 | seq_puts(seqf, "major minor #blocks name\n\n"); | ||
702 | return p; | ||
344 | } | 703 | } |
345 | 704 | ||
346 | static int show_partition(struct seq_file *part, void *v) | 705 | static int show_partition(struct seq_file *seqf, void *v) |
347 | { | 706 | { |
348 | struct gendisk *sgp = v; | 707 | struct gendisk *sgp = v; |
349 | int n; | 708 | struct disk_part_iter piter; |
709 | struct hd_struct *part; | ||
350 | char buf[BDEVNAME_SIZE]; | 710 | char buf[BDEVNAME_SIZE]; |
351 | 711 | ||
352 | /* | ||
353 | * Print header if start told us to do. This is to preserve | ||
354 | * the original behavior of not printing header if no | ||
355 | * partition exists. This hackery will be removed later with | ||
356 | * class iteration clean up. | ||
357 | */ | ||
358 | if (part->private) { | ||
359 | seq_puts(part, "major minor #blocks name\n\n"); | ||
360 | part->private = NULL; | ||
361 | } | ||
362 | |||
363 | /* Don't show non-partitionable removeable devices or empty devices */ | 712 | /* Don't show non-partitionable removeable devices or empty devices */ |
364 | if (!get_capacity(sgp) || | 713 | if (!get_capacity(sgp) || (!disk_partitionable(sgp) && |
365 | (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE))) | 714 | (sgp->flags & GENHD_FL_REMOVABLE))) |
366 | return 0; | 715 | return 0; |
367 | if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) | 716 | if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) |
368 | return 0; | 717 | return 0; |
369 | 718 | ||
370 | /* show the full disk and all non-0 size partitions of it */ | 719 | /* show the full disk and all non-0 size partitions of it */ |
371 | seq_printf(part, "%4d %4d %10llu %s\n", | 720 | disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); |
372 | sgp->major, sgp->first_minor, | 721 | while ((part = disk_part_iter_next(&piter))) |
373 | (unsigned long long)get_capacity(sgp) >> 1, | 722 | seq_printf(seqf, "%4d %7d %10llu %s\n", |
374 | disk_name(sgp, 0, buf)); | 723 | MAJOR(part_devt(part)), MINOR(part_devt(part)), |
375 | for (n = 0; n < sgp->minors - 1; n++) { | 724 | (unsigned long long)part->nr_sects >> 1, |
376 | if (!sgp->part[n]) | 725 | disk_name(sgp, part->partno, buf)); |
377 | continue; | 726 | disk_part_iter_exit(&piter); |
378 | if (sgp->part[n]->nr_sects == 0) | ||
379 | continue; | ||
380 | seq_printf(part, "%4d %4d %10llu %s\n", | ||
381 | sgp->major, n + 1 + sgp->first_minor, | ||
382 | (unsigned long long)sgp->part[n]->nr_sects >> 1 , | ||
383 | disk_name(sgp, n + 1, buf)); | ||
384 | } | ||
385 | 727 | ||
386 | return 0; | 728 | return 0; |
387 | } | 729 | } |
388 | 730 | ||
389 | const struct seq_operations partitions_op = { | 731 | const struct seq_operations partitions_op = { |
390 | .start = part_start, | 732 | .start = show_partition_start, |
391 | .next = part_next, | 733 | .next = disk_seqf_next, |
392 | .stop = part_stop, | 734 | .stop = disk_seqf_stop, |
393 | .show = show_partition | 735 | .show = show_partition |
394 | }; | 736 | }; |
395 | #endif | 737 | #endif |
396 | 738 | ||
397 | 739 | ||
398 | static struct kobject *base_probe(dev_t devt, int *part, void *data) | 740 | static struct kobject *base_probe(dev_t devt, int *partno, void *data) |
399 | { | 741 | { |
400 | if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) | 742 | if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) |
401 | /* Make old-style 2.4 aliases work */ | 743 | /* Make old-style 2.4 aliases work */ |
@@ -431,29 +773,29 @@ static ssize_t disk_range_show(struct device *dev, | |||
431 | return sprintf(buf, "%d\n", disk->minors); | 773 | return sprintf(buf, "%d\n", disk->minors); |
432 | } | 774 | } |
433 | 775 | ||
434 | static ssize_t disk_removable_show(struct device *dev, | 776 | static ssize_t disk_ext_range_show(struct device *dev, |
435 | struct device_attribute *attr, char *buf) | 777 | struct device_attribute *attr, char *buf) |
436 | { | 778 | { |
437 | struct gendisk *disk = dev_to_disk(dev); | 779 | struct gendisk *disk = dev_to_disk(dev); |
438 | 780 | ||
439 | return sprintf(buf, "%d\n", | 781 | return sprintf(buf, "%d\n", disk_max_parts(disk)); |
440 | (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); | ||
441 | } | 782 | } |
442 | 783 | ||
443 | static ssize_t disk_ro_show(struct device *dev, | 784 | static ssize_t disk_removable_show(struct device *dev, |
444 | struct device_attribute *attr, char *buf) | 785 | struct device_attribute *attr, char *buf) |
445 | { | 786 | { |
446 | struct gendisk *disk = dev_to_disk(dev); | 787 | struct gendisk *disk = dev_to_disk(dev); |
447 | 788 | ||
448 | return sprintf(buf, "%d\n", disk->policy ? 1 : 0); | 789 | return sprintf(buf, "%d\n", |
790 | (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); | ||
449 | } | 791 | } |
450 | 792 | ||
451 | static ssize_t disk_size_show(struct device *dev, | 793 | static ssize_t disk_ro_show(struct device *dev, |
452 | struct device_attribute *attr, char *buf) | 794 | struct device_attribute *attr, char *buf) |
453 | { | 795 | { |
454 | struct gendisk *disk = dev_to_disk(dev); | 796 | struct gendisk *disk = dev_to_disk(dev); |
455 | 797 | ||
456 | return sprintf(buf, "%llu\n", (unsigned long long)get_capacity(disk)); | 798 | return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); |
457 | } | 799 | } |
458 | 800 | ||
459 | static ssize_t disk_capability_show(struct device *dev, | 801 | static ssize_t disk_capability_show(struct device *dev, |
@@ -464,73 +806,26 @@ static ssize_t disk_capability_show(struct device *dev, | |||
464 | return sprintf(buf, "%x\n", disk->flags); | 806 | return sprintf(buf, "%x\n", disk->flags); |
465 | } | 807 | } |
466 | 808 | ||
467 | static ssize_t disk_stat_show(struct device *dev, | ||
468 | struct device_attribute *attr, char *buf) | ||
469 | { | ||
470 | struct gendisk *disk = dev_to_disk(dev); | ||
471 | |||
472 | preempt_disable(); | ||
473 | disk_round_stats(disk); | ||
474 | preempt_enable(); | ||
475 | return sprintf(buf, | ||
476 | "%8lu %8lu %8llu %8u " | ||
477 | "%8lu %8lu %8llu %8u " | ||
478 | "%8u %8u %8u" | ||
479 | "\n", | ||
480 | disk_stat_read(disk, ios[READ]), | ||
481 | disk_stat_read(disk, merges[READ]), | ||
482 | (unsigned long long)disk_stat_read(disk, sectors[READ]), | ||
483 | jiffies_to_msecs(disk_stat_read(disk, ticks[READ])), | ||
484 | disk_stat_read(disk, ios[WRITE]), | ||
485 | disk_stat_read(disk, merges[WRITE]), | ||
486 | (unsigned long long)disk_stat_read(disk, sectors[WRITE]), | ||
487 | jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])), | ||
488 | disk->in_flight, | ||
489 | jiffies_to_msecs(disk_stat_read(disk, io_ticks)), | ||
490 | jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); | ||
491 | } | ||
492 | |||
493 | #ifdef CONFIG_FAIL_MAKE_REQUEST | ||
494 | static ssize_t disk_fail_show(struct device *dev, | ||
495 | struct device_attribute *attr, char *buf) | ||
496 | { | ||
497 | struct gendisk *disk = dev_to_disk(dev); | ||
498 | |||
499 | return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0); | ||
500 | } | ||
501 | |||
502 | static ssize_t disk_fail_store(struct device *dev, | ||
503 | struct device_attribute *attr, | ||
504 | const char *buf, size_t count) | ||
505 | { | ||
506 | struct gendisk *disk = dev_to_disk(dev); | ||
507 | int i; | ||
508 | |||
509 | if (count > 0 && sscanf(buf, "%d", &i) > 0) { | ||
510 | if (i == 0) | ||
511 | disk->flags &= ~GENHD_FL_FAIL; | ||
512 | else | ||
513 | disk->flags |= GENHD_FL_FAIL; | ||
514 | } | ||
515 | |||
516 | return count; | ||
517 | } | ||
518 | |||
519 | #endif | ||
520 | |||
521 | static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); | 809 | static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); |
810 | static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); | ||
522 | static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); | 811 | static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); |
523 | static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); | 812 | static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); |
524 | static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL); | 813 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); |
525 | static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); | 814 | static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); |
526 | static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL); | 815 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); |
527 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 816 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
528 | static struct device_attribute dev_attr_fail = | 817 | static struct device_attribute dev_attr_fail = |
529 | __ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store); | 818 | __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); |
819 | #endif | ||
820 | #ifdef CONFIG_FAIL_IO_TIMEOUT | ||
821 | static struct device_attribute dev_attr_fail_timeout = | ||
822 | __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show, | ||
823 | part_timeout_store); | ||
530 | #endif | 824 | #endif |
531 | 825 | ||
532 | static struct attribute *disk_attrs[] = { | 826 | static struct attribute *disk_attrs[] = { |
533 | &dev_attr_range.attr, | 827 | &dev_attr_range.attr, |
828 | &dev_attr_ext_range.attr, | ||
534 | &dev_attr_removable.attr, | 829 | &dev_attr_removable.attr, |
535 | &dev_attr_ro.attr, | 830 | &dev_attr_ro.attr, |
536 | &dev_attr_size.attr, | 831 | &dev_attr_size.attr, |
@@ -539,6 +834,9 @@ static struct attribute *disk_attrs[] = { | |||
539 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 834 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
540 | &dev_attr_fail.attr, | 835 | &dev_attr_fail.attr, |
541 | #endif | 836 | #endif |
837 | #ifdef CONFIG_FAIL_IO_TIMEOUT | ||
838 | &dev_attr_fail_timeout.attr, | ||
839 | #endif | ||
542 | NULL | 840 | NULL |
543 | }; | 841 | }; |
544 | 842 | ||
@@ -551,13 +849,87 @@ static struct attribute_group *disk_attr_groups[] = { | |||
551 | NULL | 849 | NULL |
552 | }; | 850 | }; |
553 | 851 | ||
852 | static void disk_free_ptbl_rcu_cb(struct rcu_head *head) | ||
853 | { | ||
854 | struct disk_part_tbl *ptbl = | ||
855 | container_of(head, struct disk_part_tbl, rcu_head); | ||
856 | |||
857 | kfree(ptbl); | ||
858 | } | ||
859 | |||
860 | /** | ||
861 | * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way | ||
862 | * @disk: disk to replace part_tbl for | ||
863 | * @new_ptbl: new part_tbl to install | ||
864 | * | ||
865 | * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The | ||
866 | * original ptbl is freed using RCU callback. | ||
867 | * | ||
868 | * LOCKING: | ||
869 | * Matching bd_mutx locked. | ||
870 | */ | ||
871 | static void disk_replace_part_tbl(struct gendisk *disk, | ||
872 | struct disk_part_tbl *new_ptbl) | ||
873 | { | ||
874 | struct disk_part_tbl *old_ptbl = disk->part_tbl; | ||
875 | |||
876 | rcu_assign_pointer(disk->part_tbl, new_ptbl); | ||
877 | if (old_ptbl) | ||
878 | call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); | ||
879 | } | ||
880 | |||
881 | /** | ||
882 | * disk_expand_part_tbl - expand disk->part_tbl | ||
883 | * @disk: disk to expand part_tbl for | ||
884 | * @partno: expand such that this partno can fit in | ||
885 | * | ||
886 | * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl | ||
887 | * uses RCU to allow unlocked dereferencing for stats and other stuff. | ||
888 | * | ||
889 | * LOCKING: | ||
890 | * Matching bd_mutex locked, might sleep. | ||
891 | * | ||
892 | * RETURNS: | ||
893 | * 0 on success, -errno on failure. | ||
894 | */ | ||
895 | int disk_expand_part_tbl(struct gendisk *disk, int partno) | ||
896 | { | ||
897 | struct disk_part_tbl *old_ptbl = disk->part_tbl; | ||
898 | struct disk_part_tbl *new_ptbl; | ||
899 | int len = old_ptbl ? old_ptbl->len : 0; | ||
900 | int target = partno + 1; | ||
901 | size_t size; | ||
902 | int i; | ||
903 | |||
904 | /* disk_max_parts() is zero during initialization, ignore if so */ | ||
905 | if (disk_max_parts(disk) && target > disk_max_parts(disk)) | ||
906 | return -EINVAL; | ||
907 | |||
908 | if (target <= len) | ||
909 | return 0; | ||
910 | |||
911 | size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]); | ||
912 | new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id); | ||
913 | if (!new_ptbl) | ||
914 | return -ENOMEM; | ||
915 | |||
916 | INIT_RCU_HEAD(&new_ptbl->rcu_head); | ||
917 | new_ptbl->len = target; | ||
918 | |||
919 | for (i = 0; i < len; i++) | ||
920 | rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]); | ||
921 | |||
922 | disk_replace_part_tbl(disk, new_ptbl); | ||
923 | return 0; | ||
924 | } | ||
925 | |||
554 | static void disk_release(struct device *dev) | 926 | static void disk_release(struct device *dev) |
555 | { | 927 | { |
556 | struct gendisk *disk = dev_to_disk(dev); | 928 | struct gendisk *disk = dev_to_disk(dev); |
557 | 929 | ||
558 | kfree(disk->random); | 930 | kfree(disk->random); |
559 | kfree(disk->part); | 931 | disk_replace_part_tbl(disk, NULL); |
560 | free_disk_stats(disk); | 932 | free_part_stats(&disk->part0); |
561 | kfree(disk); | 933 | kfree(disk); |
562 | } | 934 | } |
563 | struct class block_class = { | 935 | struct class block_class = { |
@@ -578,83 +950,31 @@ static struct device_type disk_type = { | |||
578 | * The output looks suspiciously like /proc/partitions with a bunch of | 950 | * The output looks suspiciously like /proc/partitions with a bunch of |
579 | * extra fields. | 951 | * extra fields. |
580 | */ | 952 | */ |
581 | 953 | static int diskstats_show(struct seq_file *seqf, void *v) | |
582 | static void *diskstats_start(struct seq_file *part, loff_t *pos) | ||
583 | { | ||
584 | struct device *dev; | ||
585 | loff_t k = *pos; | ||
586 | |||
587 | mutex_lock(&block_class_lock); | ||
588 | dev = class_find_device(&block_class, NULL, &k, find_start); | ||
589 | if (dev) { | ||
590 | put_device(dev); | ||
591 | return dev_to_disk(dev); | ||
592 | } | ||
593 | return NULL; | ||
594 | } | ||
595 | |||
596 | static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos) | ||
597 | { | ||
598 | struct gendisk *gp = v; | ||
599 | struct device *dev; | ||
600 | |||
601 | ++*pos; | ||
602 | dev = class_find_device(&block_class, &gp->dev, NULL, find_next); | ||
603 | if (dev) { | ||
604 | put_device(dev); | ||
605 | return dev_to_disk(dev); | ||
606 | } | ||
607 | return NULL; | ||
608 | } | ||
609 | |||
610 | static void diskstats_stop(struct seq_file *part, void *v) | ||
611 | { | ||
612 | mutex_unlock(&block_class_lock); | ||
613 | } | ||
614 | |||
615 | static int diskstats_show(struct seq_file *s, void *v) | ||
616 | { | 954 | { |
617 | struct gendisk *gp = v; | 955 | struct gendisk *gp = v; |
956 | struct disk_part_iter piter; | ||
957 | struct hd_struct *hd; | ||
618 | char buf[BDEVNAME_SIZE]; | 958 | char buf[BDEVNAME_SIZE]; |
619 | int n = 0; | 959 | int cpu; |
620 | 960 | ||
621 | /* | 961 | /* |
622 | if (&gp->dev.kobj.entry == block_class.devices.next) | 962 | if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) |
623 | seq_puts(s, "major minor name" | 963 | seq_puts(seqf, "major minor name" |
624 | " rio rmerge rsect ruse wio wmerge " | 964 | " rio rmerge rsect ruse wio wmerge " |
625 | "wsect wuse running use aveq" | 965 | "wsect wuse running use aveq" |
626 | "\n\n"); | 966 | "\n\n"); |
627 | */ | 967 | */ |
628 | 968 | ||
629 | preempt_disable(); | 969 | disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); |
630 | disk_round_stats(gp); | 970 | while ((hd = disk_part_iter_next(&piter))) { |
631 | preempt_enable(); | 971 | cpu = part_stat_lock(); |
632 | seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", | 972 | part_round_stats(cpu, hd); |
633 | gp->major, n + gp->first_minor, disk_name(gp, n, buf), | 973 | part_stat_unlock(); |
634 | disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), | 974 | seq_printf(seqf, "%4d %7d %s %lu %lu %llu " |
635 | (unsigned long long)disk_stat_read(gp, sectors[0]), | ||
636 | jiffies_to_msecs(disk_stat_read(gp, ticks[0])), | ||
637 | disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]), | ||
638 | (unsigned long long)disk_stat_read(gp, sectors[1]), | ||
639 | jiffies_to_msecs(disk_stat_read(gp, ticks[1])), | ||
640 | gp->in_flight, | ||
641 | jiffies_to_msecs(disk_stat_read(gp, io_ticks)), | ||
642 | jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); | ||
643 | |||
644 | /* now show all non-0 size partitions of it */ | ||
645 | for (n = 0; n < gp->minors - 1; n++) { | ||
646 | struct hd_struct *hd = gp->part[n]; | ||
647 | |||
648 | if (!hd || !hd->nr_sects) | ||
649 | continue; | ||
650 | |||
651 | preempt_disable(); | ||
652 | part_round_stats(hd); | ||
653 | preempt_enable(); | ||
654 | seq_printf(s, "%4d %4d %s %lu %lu %llu " | ||
655 | "%u %lu %lu %llu %u %u %u %u\n", | 975 | "%u %lu %lu %llu %u %u %u %u\n", |
656 | gp->major, n + gp->first_minor + 1, | 976 | MAJOR(part_devt(hd)), MINOR(part_devt(hd)), |
657 | disk_name(gp, n + 1, buf), | 977 | disk_name(gp, hd->partno, buf), |
658 | part_stat_read(hd, ios[0]), | 978 | part_stat_read(hd, ios[0]), |
659 | part_stat_read(hd, merges[0]), | 979 | part_stat_read(hd, merges[0]), |
660 | (unsigned long long)part_stat_read(hd, sectors[0]), | 980 | (unsigned long long)part_stat_read(hd, sectors[0]), |
@@ -668,14 +988,15 @@ static int diskstats_show(struct seq_file *s, void *v) | |||
668 | jiffies_to_msecs(part_stat_read(hd, time_in_queue)) | 988 | jiffies_to_msecs(part_stat_read(hd, time_in_queue)) |
669 | ); | 989 | ); |
670 | } | 990 | } |
991 | disk_part_iter_exit(&piter); | ||
671 | 992 | ||
672 | return 0; | 993 | return 0; |
673 | } | 994 | } |
674 | 995 | ||
675 | const struct seq_operations diskstats_op = { | 996 | const struct seq_operations diskstats_op = { |
676 | .start = diskstats_start, | 997 | .start = disk_seqf_start, |
677 | .next = diskstats_next, | 998 | .next = disk_seqf_next, |
678 | .stop = diskstats_stop, | 999 | .stop = disk_seqf_stop, |
679 | .show = diskstats_show | 1000 | .show = diskstats_show |
680 | }; | 1001 | }; |
681 | #endif /* CONFIG_PROC_FS */ | 1002 | #endif /* CONFIG_PROC_FS */ |
@@ -690,7 +1011,7 @@ static void media_change_notify_thread(struct work_struct *work) | |||
690 | * set enviroment vars to indicate which event this is for | 1011 | * set enviroment vars to indicate which event this is for |
691 | * so that user space will know to go check the media status. | 1012 | * so that user space will know to go check the media status. |
692 | */ | 1013 | */ |
693 | kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp); | 1014 | kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); |
694 | put_device(gd->driverfs_dev); | 1015 | put_device(gd->driverfs_dev); |
695 | } | 1016 | } |
696 | 1017 | ||
@@ -703,42 +1024,29 @@ void genhd_media_change_notify(struct gendisk *disk) | |||
703 | EXPORT_SYMBOL_GPL(genhd_media_change_notify); | 1024 | EXPORT_SYMBOL_GPL(genhd_media_change_notify); |
704 | #endif /* 0 */ | 1025 | #endif /* 0 */ |
705 | 1026 | ||
706 | struct find_block { | 1027 | dev_t blk_lookup_devt(const char *name, int partno) |
707 | const char *name; | ||
708 | int part; | ||
709 | }; | ||
710 | |||
711 | static int match_id(struct device *dev, void *data) | ||
712 | { | 1028 | { |
713 | struct find_block *find = data; | 1029 | dev_t devt = MKDEV(0, 0); |
1030 | struct class_dev_iter iter; | ||
1031 | struct device *dev; | ||
714 | 1032 | ||
715 | if (dev->type != &disk_type) | 1033 | class_dev_iter_init(&iter, &block_class, NULL, &disk_type); |
716 | return 0; | 1034 | while ((dev = class_dev_iter_next(&iter))) { |
717 | if (strcmp(dev->bus_id, find->name) == 0) { | ||
718 | struct gendisk *disk = dev_to_disk(dev); | 1035 | struct gendisk *disk = dev_to_disk(dev); |
719 | if (find->part < disk->minors) | 1036 | struct hd_struct *part; |
720 | return 1; | ||
721 | } | ||
722 | return 0; | ||
723 | } | ||
724 | 1037 | ||
725 | dev_t blk_lookup_devt(const char *name, int part) | 1038 | if (strcmp(dev->bus_id, name)) |
726 | { | 1039 | continue; |
727 | struct device *dev; | ||
728 | dev_t devt = MKDEV(0, 0); | ||
729 | struct find_block find; | ||
730 | 1040 | ||
731 | mutex_lock(&block_class_lock); | 1041 | part = disk_get_part(disk, partno); |
732 | find.name = name; | 1042 | if (part) { |
733 | find.part = part; | 1043 | devt = part_devt(part); |
734 | dev = class_find_device(&block_class, NULL, &find, match_id); | 1044 | disk_put_part(part); |
735 | if (dev) { | 1045 | break; |
736 | put_device(dev); | 1046 | } |
737 | devt = MKDEV(MAJOR(dev->devt), | 1047 | disk_put_part(part); |
738 | MINOR(dev->devt) + part); | ||
739 | } | 1048 | } |
740 | mutex_unlock(&block_class_lock); | 1049 | class_dev_iter_exit(&iter); |
741 | |||
742 | return devt; | 1050 | return devt; |
743 | } | 1051 | } |
744 | EXPORT_SYMBOL(blk_lookup_devt); | 1052 | EXPORT_SYMBOL(blk_lookup_devt); |
@@ -747,6 +1055,7 @@ struct gendisk *alloc_disk(int minors) | |||
747 | { | 1055 | { |
748 | return alloc_disk_node(minors, -1); | 1056 | return alloc_disk_node(minors, -1); |
749 | } | 1057 | } |
1058 | EXPORT_SYMBOL(alloc_disk); | ||
750 | 1059 | ||
751 | struct gendisk *alloc_disk_node(int minors, int node_id) | 1060 | struct gendisk *alloc_disk_node(int minors, int node_id) |
752 | { | 1061 | { |
@@ -755,32 +1064,28 @@ struct gendisk *alloc_disk_node(int minors, int node_id) | |||
755 | disk = kmalloc_node(sizeof(struct gendisk), | 1064 | disk = kmalloc_node(sizeof(struct gendisk), |
756 | GFP_KERNEL | __GFP_ZERO, node_id); | 1065 | GFP_KERNEL | __GFP_ZERO, node_id); |
757 | if (disk) { | 1066 | if (disk) { |
758 | if (!init_disk_stats(disk)) { | 1067 | if (!init_part_stats(&disk->part0)) { |
759 | kfree(disk); | 1068 | kfree(disk); |
760 | return NULL; | 1069 | return NULL; |
761 | } | 1070 | } |
762 | if (minors > 1) { | 1071 | if (disk_expand_part_tbl(disk, 0)) { |
763 | int size = (minors - 1) * sizeof(struct hd_struct *); | 1072 | free_part_stats(&disk->part0); |
764 | disk->part = kmalloc_node(size, | 1073 | kfree(disk); |
765 | GFP_KERNEL | __GFP_ZERO, node_id); | 1074 | return NULL; |
766 | if (!disk->part) { | ||
767 | free_disk_stats(disk); | ||
768 | kfree(disk); | ||
769 | return NULL; | ||
770 | } | ||
771 | } | 1075 | } |
1076 | disk->part_tbl->part[0] = &disk->part0; | ||
1077 | |||
772 | disk->minors = minors; | 1078 | disk->minors = minors; |
773 | rand_initialize_disk(disk); | 1079 | rand_initialize_disk(disk); |
774 | disk->dev.class = &block_class; | 1080 | disk_to_dev(disk)->class = &block_class; |
775 | disk->dev.type = &disk_type; | 1081 | disk_to_dev(disk)->type = &disk_type; |
776 | device_initialize(&disk->dev); | 1082 | device_initialize(disk_to_dev(disk)); |
777 | INIT_WORK(&disk->async_notify, | 1083 | INIT_WORK(&disk->async_notify, |
778 | media_change_notify_thread); | 1084 | media_change_notify_thread); |
1085 | disk->node_id = node_id; | ||
779 | } | 1086 | } |
780 | return disk; | 1087 | return disk; |
781 | } | 1088 | } |
782 | |||
783 | EXPORT_SYMBOL(alloc_disk); | ||
784 | EXPORT_SYMBOL(alloc_disk_node); | 1089 | EXPORT_SYMBOL(alloc_disk_node); |
785 | 1090 | ||
786 | struct kobject *get_disk(struct gendisk *disk) | 1091 | struct kobject *get_disk(struct gendisk *disk) |
@@ -793,7 +1098,7 @@ struct kobject *get_disk(struct gendisk *disk) | |||
793 | owner = disk->fops->owner; | 1098 | owner = disk->fops->owner; |
794 | if (owner && !try_module_get(owner)) | 1099 | if (owner && !try_module_get(owner)) |
795 | return NULL; | 1100 | return NULL; |
796 | kobj = kobject_get(&disk->dev.kobj); | 1101 | kobj = kobject_get(&disk_to_dev(disk)->kobj); |
797 | if (kobj == NULL) { | 1102 | if (kobj == NULL) { |
798 | module_put(owner); | 1103 | module_put(owner); |
799 | return NULL; | 1104 | return NULL; |
@@ -807,27 +1112,28 @@ EXPORT_SYMBOL(get_disk); | |||
807 | void put_disk(struct gendisk *disk) | 1112 | void put_disk(struct gendisk *disk) |
808 | { | 1113 | { |
809 | if (disk) | 1114 | if (disk) |
810 | kobject_put(&disk->dev.kobj); | 1115 | kobject_put(&disk_to_dev(disk)->kobj); |
811 | } | 1116 | } |
812 | 1117 | ||
813 | EXPORT_SYMBOL(put_disk); | 1118 | EXPORT_SYMBOL(put_disk); |
814 | 1119 | ||
815 | void set_device_ro(struct block_device *bdev, int flag) | 1120 | void set_device_ro(struct block_device *bdev, int flag) |
816 | { | 1121 | { |
817 | if (bdev->bd_contains != bdev) | 1122 | bdev->bd_part->policy = flag; |
818 | bdev->bd_part->policy = flag; | ||
819 | else | ||
820 | bdev->bd_disk->policy = flag; | ||
821 | } | 1123 | } |
822 | 1124 | ||
823 | EXPORT_SYMBOL(set_device_ro); | 1125 | EXPORT_SYMBOL(set_device_ro); |
824 | 1126 | ||
825 | void set_disk_ro(struct gendisk *disk, int flag) | 1127 | void set_disk_ro(struct gendisk *disk, int flag) |
826 | { | 1128 | { |
827 | int i; | 1129 | struct disk_part_iter piter; |
828 | disk->policy = flag; | 1130 | struct hd_struct *part; |
829 | for (i = 0; i < disk->minors - 1; i++) | 1131 | |
830 | if (disk->part[i]) disk->part[i]->policy = flag; | 1132 | disk_part_iter_init(&piter, disk, |
1133 | DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0); | ||
1134 | while ((part = disk_part_iter_next(&piter))) | ||
1135 | part->policy = flag; | ||
1136 | disk_part_iter_exit(&piter); | ||
831 | } | 1137 | } |
832 | 1138 | ||
833 | EXPORT_SYMBOL(set_disk_ro); | 1139 | EXPORT_SYMBOL(set_disk_ro); |
@@ -836,18 +1142,15 @@ int bdev_read_only(struct block_device *bdev) | |||
836 | { | 1142 | { |
837 | if (!bdev) | 1143 | if (!bdev) |
838 | return 0; | 1144 | return 0; |
839 | else if (bdev->bd_contains != bdev) | 1145 | return bdev->bd_part->policy; |
840 | return bdev->bd_part->policy; | ||
841 | else | ||
842 | return bdev->bd_disk->policy; | ||
843 | } | 1146 | } |
844 | 1147 | ||
845 | EXPORT_SYMBOL(bdev_read_only); | 1148 | EXPORT_SYMBOL(bdev_read_only); |
846 | 1149 | ||
847 | int invalidate_partition(struct gendisk *disk, int index) | 1150 | int invalidate_partition(struct gendisk *disk, int partno) |
848 | { | 1151 | { |
849 | int res = 0; | 1152 | int res = 0; |
850 | struct block_device *bdev = bdget_disk(disk, index); | 1153 | struct block_device *bdev = bdget_disk(disk, partno); |
851 | if (bdev) { | 1154 | if (bdev) { |
852 | fsync_bdev(bdev); | 1155 | fsync_bdev(bdev); |
853 | res = __invalidate_device(bdev); | 1156 | res = __invalidate_device(bdev); |
diff --git a/block/ioctl.c b/block/ioctl.c index 77185e5c026a..38bee321e1fa 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -12,11 +12,12 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
12 | { | 12 | { |
13 | struct block_device *bdevp; | 13 | struct block_device *bdevp; |
14 | struct gendisk *disk; | 14 | struct gendisk *disk; |
15 | struct hd_struct *part; | ||
15 | struct blkpg_ioctl_arg a; | 16 | struct blkpg_ioctl_arg a; |
16 | struct blkpg_partition p; | 17 | struct blkpg_partition p; |
18 | struct disk_part_iter piter; | ||
17 | long long start, length; | 19 | long long start, length; |
18 | int part; | 20 | int partno; |
19 | int i; | ||
20 | int err; | 21 | int err; |
21 | 22 | ||
22 | if (!capable(CAP_SYS_ADMIN)) | 23 | if (!capable(CAP_SYS_ADMIN)) |
@@ -28,8 +29,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
28 | disk = bdev->bd_disk; | 29 | disk = bdev->bd_disk; |
29 | if (bdev != bdev->bd_contains) | 30 | if (bdev != bdev->bd_contains) |
30 | return -EINVAL; | 31 | return -EINVAL; |
31 | part = p.pno; | 32 | partno = p.pno; |
32 | if (part <= 0 || part >= disk->minors) | 33 | if (partno <= 0) |
33 | return -EINVAL; | 34 | return -EINVAL; |
34 | switch (a.op) { | 35 | switch (a.op) { |
35 | case BLKPG_ADD_PARTITION: | 36 | case BLKPG_ADD_PARTITION: |
@@ -43,36 +44,37 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
43 | || pstart < 0 || plength < 0) | 44 | || pstart < 0 || plength < 0) |
44 | return -EINVAL; | 45 | return -EINVAL; |
45 | } | 46 | } |
46 | /* partition number in use? */ | 47 | |
47 | mutex_lock(&bdev->bd_mutex); | 48 | mutex_lock(&bdev->bd_mutex); |
48 | if (disk->part[part - 1]) { | ||
49 | mutex_unlock(&bdev->bd_mutex); | ||
50 | return -EBUSY; | ||
51 | } | ||
52 | /* overlap? */ | ||
53 | for (i = 0; i < disk->minors - 1; i++) { | ||
54 | struct hd_struct *s = disk->part[i]; | ||
55 | 49 | ||
56 | if (!s) | 50 | /* overlap? */ |
57 | continue; | 51 | disk_part_iter_init(&piter, disk, |
58 | if (!(start+length <= s->start_sect || | 52 | DISK_PITER_INCL_EMPTY); |
59 | start >= s->start_sect + s->nr_sects)) { | 53 | while ((part = disk_part_iter_next(&piter))) { |
54 | if (!(start + length <= part->start_sect || | ||
55 | start >= part->start_sect + part->nr_sects)) { | ||
56 | disk_part_iter_exit(&piter); | ||
60 | mutex_unlock(&bdev->bd_mutex); | 57 | mutex_unlock(&bdev->bd_mutex); |
61 | return -EBUSY; | 58 | return -EBUSY; |
62 | } | 59 | } |
63 | } | 60 | } |
61 | disk_part_iter_exit(&piter); | ||
62 | |||
64 | /* all seems OK */ | 63 | /* all seems OK */ |
65 | err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE); | 64 | err = add_partition(disk, partno, start, length, |
65 | ADDPART_FLAG_NONE); | ||
66 | mutex_unlock(&bdev->bd_mutex); | 66 | mutex_unlock(&bdev->bd_mutex); |
67 | return err; | 67 | return err; |
68 | case BLKPG_DEL_PARTITION: | 68 | case BLKPG_DEL_PARTITION: |
69 | if (!disk->part[part-1]) | 69 | part = disk_get_part(disk, partno); |
70 | return -ENXIO; | 70 | if (!part) |
71 | if (disk->part[part - 1]->nr_sects == 0) | ||
72 | return -ENXIO; | 71 | return -ENXIO; |
73 | bdevp = bdget_disk(disk, part); | 72 | |
73 | bdevp = bdget(part_devt(part)); | ||
74 | disk_put_part(part); | ||
74 | if (!bdevp) | 75 | if (!bdevp) |
75 | return -ENOMEM; | 76 | return -ENOMEM; |
77 | |||
76 | mutex_lock(&bdevp->bd_mutex); | 78 | mutex_lock(&bdevp->bd_mutex); |
77 | if (bdevp->bd_openers) { | 79 | if (bdevp->bd_openers) { |
78 | mutex_unlock(&bdevp->bd_mutex); | 80 | mutex_unlock(&bdevp->bd_mutex); |
@@ -84,7 +86,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user | |||
84 | invalidate_bdev(bdevp); | 86 | invalidate_bdev(bdevp); |
85 | 87 | ||
86 | mutex_lock_nested(&bdev->bd_mutex, 1); | 88 | mutex_lock_nested(&bdev->bd_mutex, 1); |
87 | delete_partition(disk, part); | 89 | delete_partition(disk, partno); |
88 | mutex_unlock(&bdev->bd_mutex); | 90 | mutex_unlock(&bdev->bd_mutex); |
89 | mutex_unlock(&bdevp->bd_mutex); | 91 | mutex_unlock(&bdevp->bd_mutex); |
90 | bdput(bdevp); | 92 | bdput(bdevp); |
@@ -100,7 +102,7 @@ static int blkdev_reread_part(struct block_device *bdev) | |||
100 | struct gendisk *disk = bdev->bd_disk; | 102 | struct gendisk *disk = bdev->bd_disk; |
101 | int res; | 103 | int res; |
102 | 104 | ||
103 | if (disk->minors == 1 || bdev != bdev->bd_contains) | 105 | if (!disk_partitionable(disk) || bdev != bdev->bd_contains) |
104 | return -EINVAL; | 106 | return -EINVAL; |
105 | if (!capable(CAP_SYS_ADMIN)) | 107 | if (!capable(CAP_SYS_ADMIN)) |
106 | return -EACCES; | 108 | return -EACCES; |
@@ -111,6 +113,69 @@ static int blkdev_reread_part(struct block_device *bdev) | |||
111 | return res; | 113 | return res; |
112 | } | 114 | } |
113 | 115 | ||
116 | static void blk_ioc_discard_endio(struct bio *bio, int err) | ||
117 | { | ||
118 | if (err) { | ||
119 | if (err == -EOPNOTSUPP) | ||
120 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
121 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
122 | } | ||
123 | complete(bio->bi_private); | ||
124 | } | ||
125 | |||
126 | static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, | ||
127 | uint64_t len) | ||
128 | { | ||
129 | struct request_queue *q = bdev_get_queue(bdev); | ||
130 | int ret = 0; | ||
131 | |||
132 | if (start & 511) | ||
133 | return -EINVAL; | ||
134 | if (len & 511) | ||
135 | return -EINVAL; | ||
136 | start >>= 9; | ||
137 | len >>= 9; | ||
138 | |||
139 | if (start + len > (bdev->bd_inode->i_size >> 9)) | ||
140 | return -EINVAL; | ||
141 | |||
142 | if (!q->prepare_discard_fn) | ||
143 | return -EOPNOTSUPP; | ||
144 | |||
145 | while (len && !ret) { | ||
146 | DECLARE_COMPLETION_ONSTACK(wait); | ||
147 | struct bio *bio; | ||
148 | |||
149 | bio = bio_alloc(GFP_KERNEL, 0); | ||
150 | if (!bio) | ||
151 | return -ENOMEM; | ||
152 | |||
153 | bio->bi_end_io = blk_ioc_discard_endio; | ||
154 | bio->bi_bdev = bdev; | ||
155 | bio->bi_private = &wait; | ||
156 | bio->bi_sector = start; | ||
157 | |||
158 | if (len > q->max_hw_sectors) { | ||
159 | bio->bi_size = q->max_hw_sectors << 9; | ||
160 | len -= q->max_hw_sectors; | ||
161 | start += q->max_hw_sectors; | ||
162 | } else { | ||
163 | bio->bi_size = len << 9; | ||
164 | len = 0; | ||
165 | } | ||
166 | submit_bio(DISCARD_NOBARRIER, bio); | ||
167 | |||
168 | wait_for_completion(&wait); | ||
169 | |||
170 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | ||
171 | ret = -EOPNOTSUPP; | ||
172 | else if (!bio_flagged(bio, BIO_UPTODATE)) | ||
173 | ret = -EIO; | ||
174 | bio_put(bio); | ||
175 | } | ||
176 | return ret; | ||
177 | } | ||
178 | |||
114 | static int put_ushort(unsigned long arg, unsigned short val) | 179 | static int put_ushort(unsigned long arg, unsigned short val) |
115 | { | 180 | { |
116 | return put_user(val, (unsigned short __user *)arg); | 181 | return put_user(val, (unsigned short __user *)arg); |
@@ -258,6 +323,19 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, | |||
258 | set_device_ro(bdev, n); | 323 | set_device_ro(bdev, n); |
259 | unlock_kernel(); | 324 | unlock_kernel(); |
260 | return 0; | 325 | return 0; |
326 | |||
327 | case BLKDISCARD: { | ||
328 | uint64_t range[2]; | ||
329 | |||
330 | if (!(file->f_mode & FMODE_WRITE)) | ||
331 | return -EBADF; | ||
332 | |||
333 | if (copy_from_user(range, (void __user *)arg, sizeof(range))) | ||
334 | return -EFAULT; | ||
335 | |||
336 | return blk_ioctl_discard(bdev, range[0], range[1]); | ||
337 | } | ||
338 | |||
261 | case HDIO_GETGEO: { | 339 | case HDIO_GETGEO: { |
262 | struct hd_geometry geo; | 340 | struct hd_geometry geo; |
263 | 341 | ||
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index ec4b7f234626..c34272a348fe 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c | |||
@@ -185,6 +185,7 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) | |||
185 | __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok); | 185 | __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok); |
186 | __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok); | 186 | __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok); |
187 | __set_bit(GPCMD_SET_STREAMING, filter->write_ok); | 187 | __set_bit(GPCMD_SET_STREAMING, filter->write_ok); |
188 | __set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok); | ||
188 | } | 189 | } |
189 | EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults); | 190 | EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults); |
190 | 191 | ||
@@ -313,11 +314,12 @@ static int sg_io(struct file *file, struct request_queue *q, | |||
313 | goto out; | 314 | goto out; |
314 | } | 315 | } |
315 | 316 | ||
316 | ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count, | 317 | ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count, |
317 | hdr->dxfer_len); | 318 | hdr->dxfer_len, GFP_KERNEL); |
318 | kfree(iov); | 319 | kfree(iov); |
319 | } else if (hdr->dxfer_len) | 320 | } else if (hdr->dxfer_len) |
320 | ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); | 321 | ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, |
322 | GFP_KERNEL); | ||
321 | 323 | ||
322 | if (ret) | 324 | if (ret) |
323 | goto out; | 325 | goto out; |