aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-10 13:52:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-10 13:52:45 -0400
commite26feff647ef34423b048b940540a0059001ddb0 (patch)
treeacafe68602ee2f6f1a438c113073ffcc0040e949
parentd403a6484f0341bf0624d17ece46f24f741b6a92 (diff)
parentb911e473d24633c19414b54b82b9ff0b1a2419d7 (diff)
Merge branch 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.28' of git://git.kernel.dk/linux-2.6-block: (132 commits) doc/cdrom: Trvial documentation error, file not present block_dev: fix kernel-doc in new functions block: add some comments around the bio read-write flags block: mark bio_split_pool static block: Find bio sector offset given idx and offset block: gendisk integrity wrapper block: Switch blk_integrity_compare from bdev to gendisk block: Fix double put in blk_integrity_unregister block: Introduce integrity data ownership flag block: revert part of d7533ad0e132f92e75c1b2eb7c26387b25a583c1 bio.h: Remove unused conditional code block: remove end_{queued|dequeued}_request() block: change elevator to use __blk_end_request() gdrom: change to use __blk_end_request() memstick: change to use __blk_end_request() virtio_blk: change to use __blk_end_request() blktrace: use BLKTRACE_BDEV_SIZE as the name size for setup structure block: add lld busy state exporting interface block: Fix blk_start_queueing() to not kick a stopped queue include blktrace_api.h in headers_install ...
-rw-r--r--Documentation/DMA-API.txt2
-rw-r--r--Documentation/DocBook/kernel-api.tmpl4
-rw-r--r--Documentation/block/deadline-iosched.txt14
-rw-r--r--Documentation/cdrom/ide-cd3
-rw-r--r--block/Makefile4
-rw-r--r--block/as-iosched.c14
-rw-r--r--block/blk-barrier.c72
-rw-r--r--block/blk-core.c605
-rw-r--r--block/blk-exec.c6
-rw-r--r--block/blk-integrity.c33
-rw-r--r--block/blk-map.c68
-rw-r--r--block/blk-merge.c129
-rw-r--r--block/blk-settings.c43
-rw-r--r--block/blk-softirq.c175
-rw-r--r--block/blk-sysfs.c35
-rw-r--r--block/blk-tag.c22
-rw-r--r--block/blk-timeout.c238
-rw-r--r--block/blk.h48
-rw-r--r--block/blktrace.c32
-rw-r--r--block/bsg.c6
-rw-r--r--block/cfq-iosched.c57
-rw-r--r--block/cmd-filter.c9
-rw-r--r--block/compat_ioctl.c1
-rw-r--r--block/deadline-iosched.c40
-rw-r--r--block/elevator.c40
-rw-r--r--block/genhd.c965
-rw-r--r--block/ioctl.c124
-rw-r--r--block/scsi_ioctl.c8
-rw-r--r--drivers/ata/libata-eh.c13
-rw-r--r--drivers/ata/libata-scsi.c4
-rw-r--r--drivers/ata/libata.h2
-rw-r--r--drivers/base/base.h2
-rw-r--r--drivers/base/class.c136
-rw-r--r--drivers/base/core.c6
-rw-r--r--drivers/block/aoe/aoeblk.c6
-rw-r--r--drivers/block/aoe/aoecmd.c19
-rw-r--r--drivers/block/aoe/aoedev.c2
-rw-r--r--drivers/block/cciss.c8
-rw-r--r--drivers/block/cciss_scsi.c151
-rw-r--r--drivers/block/cciss_scsi.h4
-rw-r--r--drivers/block/cpqarray.c2
-rw-r--r--drivers/block/floppy.c31
-rw-r--r--drivers/block/nbd.c4
-rw-r--r--drivers/block/pktcdvd.c4
-rw-r--r--drivers/block/ps3disk.c11
-rw-r--r--drivers/block/virtio_blk.c14
-rw-r--r--drivers/block/xen-blkfront.c76
-rw-r--r--drivers/cdrom/cdrom.c2
-rw-r--r--drivers/cdrom/gdrom.c4
-rw-r--r--drivers/char/random.c6
-rw-r--r--drivers/ide/ide-cd.c2
-rw-r--r--drivers/ide/ide-disk.c15
-rw-r--r--drivers/ide/ide-probe.c2
-rw-r--r--drivers/md/dm-ioctl.c6
-rw-r--r--drivers/md/dm-mpath.c15
-rw-r--r--drivers/md/dm-stripe.c4
-rw-r--r--drivers/md/dm.c40
-rw-r--r--drivers/md/linear.c10
-rw-r--r--drivers/md/md.c15
-rw-r--r--drivers/md/multipath.c8
-rw-r--r--drivers/md/raid0.c10
-rw-r--r--drivers/md/raid1.c13
-rw-r--r--drivers/md/raid10.c14
-rw-r--r--drivers/md/raid5.c75
-rw-r--r--drivers/memstick/core/mspro_block.c4
-rw-r--r--drivers/mmc/card/block.c2
-rw-r--r--drivers/mtd/ftl.c24
-rw-r--r--drivers/mtd/mtd_blkdevs.c16
-rw-r--r--drivers/s390/block/dasd_proc.c3
-rw-r--r--drivers/s390/block/dcssblk.c4
-rw-r--r--drivers/scsi/aacraid/aachba.c2
-rw-r--r--drivers/scsi/gdth.c60
-rw-r--r--drivers/scsi/gdth.h2
-rw-r--r--drivers/scsi/gdth_proc.c66
-rw-r--r--drivers/scsi/gdth_proc.h3
-rw-r--r--drivers/scsi/ibmvscsi/ibmvscsi.c2
-rw-r--r--drivers/scsi/ide-scsi.c2
-rw-r--r--drivers/scsi/ipr.c3
-rw-r--r--drivers/scsi/ips.c2
-rw-r--r--drivers/scsi/libiscsi.c17
-rw-r--r--drivers/scsi/libsas/sas_ata.c2
-rw-r--r--drivers/scsi/libsas/sas_internal.h2
-rw-r--r--drivers/scsi/libsas/sas_scsi_host.c30
-rw-r--r--drivers/scsi/megaraid/megaraid_sas.c6
-rw-r--r--drivers/scsi/ncr53c8xx.c4
-rw-r--r--drivers/scsi/qla1280.c4
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c4
-rw-r--r--drivers/scsi/scsi.c92
-rw-r--r--drivers/scsi/scsi_error.c90
-rw-r--r--drivers/scsi/scsi_lib.c17
-rw-r--r--drivers/scsi/scsi_priv.h7
-rw-r--r--drivers/scsi/scsi_sysfs.c7
-rw-r--r--drivers/scsi/scsi_tgt_lib.c2
-rw-r--r--drivers/scsi/scsi_transport_fc.c6
-rw-r--r--drivers/scsi/sd.c95
-rw-r--r--drivers/scsi/sg.c667
-rw-r--r--drivers/scsi/sr.c7
-rw-r--r--drivers/scsi/sym53c8xx_2/sym_glue.c4
-rw-r--r--fs/bio-integrity.c29
-rw-r--r--fs/bio.c297
-rw-r--r--fs/block_dev.c182
-rw-r--r--fs/fat/fatent.c14
-rw-r--r--fs/partitions/check.c268
-rw-r--r--fs/partitions/check.h4
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/ata.h6
-rw-r--r--include/linux/bio.h108
-rw-r--r--include/linux/blkdev.h151
-rw-r--r--include/linux/blktrace_api.h62
-rw-r--r--include/linux/device.h14
-rw-r--r--include/linux/elevator.h9
-rw-r--r--include/linux/fd.h8
-rw-r--r--include/linux/fs.h9
-rw-r--r--include/linux/genhd.h363
-rw-r--r--include/linux/klist.h3
-rw-r--r--include/linux/major.h2
-rw-r--r--include/linux/mtd/blktrans.h2
-rw-r--r--include/scsi/scsi_cmnd.h3
-rw-r--r--include/scsi/scsi_host.h9
-rw-r--r--include/scsi/scsi_transport.h3
-rw-r--r--init/do_mounts.c4
-rw-r--r--lib/Kconfig.debug35
-rw-r--r--lib/klist.c96
-rw-r--r--mm/bounce.c2
124 files changed, 3838 insertions, 2637 deletions
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index d8b63d164e41..b8e86460046e 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -337,7 +337,7 @@ With scatterlists, you use the resulting mapping like this:
337 int i, count = dma_map_sg(dev, sglist, nents, direction); 337 int i, count = dma_map_sg(dev, sglist, nents, direction);
338 struct scatterlist *sg; 338 struct scatterlist *sg;
339 339
340 for (i = 0, sg = sglist; i < count; i++, sg++) { 340 for_each_sg(sglist, sg, count, i) {
341 hw_address[i] = sg_dma_address(sg); 341 hw_address[i] = sg_dma_address(sg);
342 hw_len[i] = sg_dma_len(sg); 342 hw_len[i] = sg_dma_len(sg);
343 } 343 }
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index b7b1482f6e04..f5696ba9ae96 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -364,6 +364,10 @@ X!Edrivers/pnp/system.c
364!Eblock/blk-barrier.c 364!Eblock/blk-barrier.c
365!Eblock/blk-tag.c 365!Eblock/blk-tag.c
366!Iblock/blk-tag.c 366!Iblock/blk-tag.c
367!Eblock/blk-integrity.c
368!Iblock/blktrace.c
369!Iblock/genhd.c
370!Eblock/genhd.c
367 </chapter> 371 </chapter>
368 372
369 <chapter id="chrdev"> 373 <chapter id="chrdev">
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt
index c23cab13c3d1..72576769e0f4 100644
--- a/Documentation/block/deadline-iosched.txt
+++ b/Documentation/block/deadline-iosched.txt
@@ -30,12 +30,18 @@ write_expire (in ms)
30Similar to read_expire mentioned above, but for writes. 30Similar to read_expire mentioned above, but for writes.
31 31
32 32
33fifo_batch 33fifo_batch (number of requests)
34---------- 34----------
35 35
36When a read request expires its deadline, we must move some requests from 36Requests are grouped into ``batches'' of a particular data direction (read or
37the sorted io scheduler list to the block device dispatch queue. fifo_batch 37write) which are serviced in increasing sector order. To limit extra seeking,
38controls how many requests we move. 38deadline expiries are only checked between batches. fifo_batch controls the
39maximum number of requests per batch.
40
41This parameter tunes the balance between per-request latency and aggregate
42throughput. When low latency is the primary concern, smaller is better (where
43a value of 1 yields first-come first-served behaviour). Increasing fifo_batch
44generally improves throughput, at the cost of latency variation.
39 45
40 46
41writes_starved (number of dispatches) 47writes_starved (number of dispatches)
diff --git a/Documentation/cdrom/ide-cd b/Documentation/cdrom/ide-cd
index 91c0dcc6fa5c..2c558cd6c1ef 100644
--- a/Documentation/cdrom/ide-cd
+++ b/Documentation/cdrom/ide-cd
@@ -145,8 +145,7 @@ useful for reading photocds.
145 145
146To play an audio CD, you should first unmount and remove any data 146To play an audio CD, you should first unmount and remove any data
147CDROM. Any of the CDROM player programs should then work (workman, 147CDROM. Any of the CDROM player programs should then work (workman,
148workbone, cdplayer, etc.). Lacking anything else, you could use the 148workbone, cdplayer, etc.).
149cdtester program in Documentation/cdrom/sbpcd.
150 149
151On a few drives, you can read digital audio directly using a program 150On a few drives, you can read digital audio directly using a program
152such as cdda2wav. The only types of drive which I've heard support 151such as cdda2wav. The only types of drive which I've heard support
diff --git a/block/Makefile b/block/Makefile
index 208000b0750d..bfe73049f939 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,8 +4,8 @@
4 4
5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ 5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \ 7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8 cmd-filter.o 8 ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/as-iosched.c b/block/as-iosched.c
index cf4eb0eefbbf..71f0abb219ee 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -462,7 +462,7 @@ static void as_antic_stop(struct as_data *ad)
462 del_timer(&ad->antic_timer); 462 del_timer(&ad->antic_timer);
463 ad->antic_status = ANTIC_FINISHED; 463 ad->antic_status = ANTIC_FINISHED;
464 /* see as_work_handler */ 464 /* see as_work_handler */
465 kblockd_schedule_work(&ad->antic_work); 465 kblockd_schedule_work(ad->q, &ad->antic_work);
466 } 466 }
467} 467}
468 468
@@ -483,7 +483,7 @@ static void as_antic_timeout(unsigned long data)
483 aic = ad->io_context->aic; 483 aic = ad->io_context->aic;
484 484
485 ad->antic_status = ANTIC_FINISHED; 485 ad->antic_status = ANTIC_FINISHED;
486 kblockd_schedule_work(&ad->antic_work); 486 kblockd_schedule_work(q, &ad->antic_work);
487 487
488 if (aic->ttime_samples == 0) { 488 if (aic->ttime_samples == 0) {
489 /* process anticipated on has exited or timed out*/ 489 /* process anticipated on has exited or timed out*/
@@ -745,6 +745,14 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
745 */ 745 */
746static int as_can_anticipate(struct as_data *ad, struct request *rq) 746static int as_can_anticipate(struct as_data *ad, struct request *rq)
747{ 747{
748#if 0 /* disable for now, we need to check tag level as well */
749 /*
750 * SSD device without seek penalty, disable idling
751 */
752 if (blk_queue_nonrot(ad->q)) axman
753 return 0;
754#endif
755
748 if (!ad->io_context) 756 if (!ad->io_context)
749 /* 757 /*
750 * Last request submitted was a write 758 * Last request submitted was a write
@@ -844,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
844 if (ad->changed_batch && ad->nr_dispatched == 1) { 852 if (ad->changed_batch && ad->nr_dispatched == 1) {
845 ad->current_batch_expires = jiffies + 853 ad->current_batch_expires = jiffies +
846 ad->batch_expire[ad->batch_data_dir]; 854 ad->batch_expire[ad->batch_data_dir];
847 kblockd_schedule_work(&ad->antic_work); 855 kblockd_schedule_work(q, &ad->antic_work);
848 ad->changed_batch = 0; 856 ad->changed_batch = 0;
849 857
850 if (ad->batch_data_dir == REQ_SYNC) 858 if (ad->batch_data_dir == REQ_SYNC)
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index a09ead19f9c5..5c99ff8d2db8 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -293,7 +293,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
293 bio->bi_end_io = bio_end_empty_barrier; 293 bio->bi_end_io = bio_end_empty_barrier;
294 bio->bi_private = &wait; 294 bio->bi_private = &wait;
295 bio->bi_bdev = bdev; 295 bio->bi_bdev = bdev;
296 submit_bio(1 << BIO_RW_BARRIER, bio); 296 submit_bio(WRITE_BARRIER, bio);
297 297
298 wait_for_completion(&wait); 298 wait_for_completion(&wait);
299 299
@@ -315,3 +315,73 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
315 return ret; 315 return ret;
316} 316}
317EXPORT_SYMBOL(blkdev_issue_flush); 317EXPORT_SYMBOL(blkdev_issue_flush);
318
319static void blkdev_discard_end_io(struct bio *bio, int err)
320{
321 if (err) {
322 if (err == -EOPNOTSUPP)
323 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
324 clear_bit(BIO_UPTODATE, &bio->bi_flags);
325 }
326
327 bio_put(bio);
328}
329
330/**
331 * blkdev_issue_discard - queue a discard
332 * @bdev: blockdev to issue discard for
333 * @sector: start sector
334 * @nr_sects: number of sectors to discard
335 * @gfp_mask: memory allocation flags (for bio_alloc)
336 *
337 * Description:
338 * Issue a discard request for the sectors in question. Does not wait.
339 */
340int blkdev_issue_discard(struct block_device *bdev,
341 sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
342{
343 struct request_queue *q;
344 struct bio *bio;
345 int ret = 0;
346
347 if (bdev->bd_disk == NULL)
348 return -ENXIO;
349
350 q = bdev_get_queue(bdev);
351 if (!q)
352 return -ENXIO;
353
354 if (!q->prepare_discard_fn)
355 return -EOPNOTSUPP;
356
357 while (nr_sects && !ret) {
358 bio = bio_alloc(gfp_mask, 0);
359 if (!bio)
360 return -ENOMEM;
361
362 bio->bi_end_io = blkdev_discard_end_io;
363 bio->bi_bdev = bdev;
364
365 bio->bi_sector = sector;
366
367 if (nr_sects > q->max_hw_sectors) {
368 bio->bi_size = q->max_hw_sectors << 9;
369 nr_sects -= q->max_hw_sectors;
370 sector += q->max_hw_sectors;
371 } else {
372 bio->bi_size = nr_sects << 9;
373 nr_sects = 0;
374 }
375 bio_get(bio);
376 submit_bio(DISCARD_BARRIER, bio);
377
378 /* Check if it failed immediately */
379 if (bio_flagged(bio, BIO_EOPNOTSUPP))
380 ret = -EOPNOTSUPP;
381 else if (!bio_flagged(bio, BIO_UPTODATE))
382 ret = -EIO;
383 bio_put(bio);
384 }
385 return ret;
386}
387EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 2cba5ef97b2b..2d053b584410 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -26,8 +26,6 @@
26#include <linux/swap.h> 26#include <linux/swap.h>
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/task_io_accounting_ops.h> 28#include <linux/task_io_accounting_ops.h>
29#include <linux/interrupt.h>
30#include <linux/cpu.h>
31#include <linux/blktrace_api.h> 29#include <linux/blktrace_api.h>
32#include <linux/fault-inject.h> 30#include <linux/fault-inject.h>
33 31
@@ -50,27 +48,26 @@ struct kmem_cache *blk_requestq_cachep;
50 */ 48 */
51static struct workqueue_struct *kblockd_workqueue; 49static struct workqueue_struct *kblockd_workqueue;
52 50
53static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
54
55static void drive_stat_acct(struct request *rq, int new_io) 51static void drive_stat_acct(struct request *rq, int new_io)
56{ 52{
57 struct hd_struct *part; 53 struct hd_struct *part;
58 int rw = rq_data_dir(rq); 54 int rw = rq_data_dir(rq);
55 int cpu;
59 56
60 if (!blk_fs_request(rq) || !rq->rq_disk) 57 if (!blk_fs_request(rq) || !rq->rq_disk)
61 return; 58 return;
62 59
63 part = get_part(rq->rq_disk, rq->sector); 60 cpu = part_stat_lock();
61 part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
62
64 if (!new_io) 63 if (!new_io)
65 __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); 64 part_stat_inc(cpu, part, merges[rw]);
66 else { 65 else {
67 disk_round_stats(rq->rq_disk); 66 part_round_stats(cpu, part);
68 rq->rq_disk->in_flight++; 67 part_inc_in_flight(part);
69 if (part) {
70 part_round_stats(part);
71 part->in_flight++;
72 }
73 } 68 }
69
70 part_stat_unlock();
74} 71}
75 72
76void blk_queue_congestion_threshold(struct request_queue *q) 73void blk_queue_congestion_threshold(struct request_queue *q)
@@ -113,7 +110,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
113 memset(rq, 0, sizeof(*rq)); 110 memset(rq, 0, sizeof(*rq));
114 111
115 INIT_LIST_HEAD(&rq->queuelist); 112 INIT_LIST_HEAD(&rq->queuelist);
116 INIT_LIST_HEAD(&rq->donelist); 113 INIT_LIST_HEAD(&rq->timeout_list);
114 rq->cpu = -1;
117 rq->q = q; 115 rq->q = q;
118 rq->sector = rq->hard_sector = (sector_t) -1; 116 rq->sector = rq->hard_sector = (sector_t) -1;
119 INIT_HLIST_NODE(&rq->hash); 117 INIT_HLIST_NODE(&rq->hash);
@@ -308,7 +306,7 @@ void blk_unplug_timeout(unsigned long data)
308 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, 306 blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
309 q->rq.count[READ] + q->rq.count[WRITE]); 307 q->rq.count[READ] + q->rq.count[WRITE]);
310 308
311 kblockd_schedule_work(&q->unplug_work); 309 kblockd_schedule_work(q, &q->unplug_work);
312} 310}
313 311
314void blk_unplug(struct request_queue *q) 312void blk_unplug(struct request_queue *q)
@@ -325,6 +323,21 @@ void blk_unplug(struct request_queue *q)
325} 323}
326EXPORT_SYMBOL(blk_unplug); 324EXPORT_SYMBOL(blk_unplug);
327 325
326static void blk_invoke_request_fn(struct request_queue *q)
327{
328 /*
329 * one level of recursion is ok and is much faster than kicking
330 * the unplug handling
331 */
332 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
333 q->request_fn(q);
334 queue_flag_clear(QUEUE_FLAG_REENTER, q);
335 } else {
336 queue_flag_set(QUEUE_FLAG_PLUGGED, q);
337 kblockd_schedule_work(q, &q->unplug_work);
338 }
339}
340
328/** 341/**
329 * blk_start_queue - restart a previously stopped queue 342 * blk_start_queue - restart a previously stopped queue
330 * @q: The &struct request_queue in question 343 * @q: The &struct request_queue in question
@@ -339,18 +352,7 @@ void blk_start_queue(struct request_queue *q)
339 WARN_ON(!irqs_disabled()); 352 WARN_ON(!irqs_disabled());
340 353
341 queue_flag_clear(QUEUE_FLAG_STOPPED, q); 354 queue_flag_clear(QUEUE_FLAG_STOPPED, q);
342 355 blk_invoke_request_fn(q);
343 /*
344 * one level of recursion is ok and is much faster than kicking
345 * the unplug handling
346 */
347 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
348 q->request_fn(q);
349 queue_flag_clear(QUEUE_FLAG_REENTER, q);
350 } else {
351 blk_plug_device(q);
352 kblockd_schedule_work(&q->unplug_work);
353 }
354} 356}
355EXPORT_SYMBOL(blk_start_queue); 357EXPORT_SYMBOL(blk_start_queue);
356 358
@@ -408,15 +410,8 @@ void __blk_run_queue(struct request_queue *q)
408 * Only recurse once to avoid overrunning the stack, let the unplug 410 * Only recurse once to avoid overrunning the stack, let the unplug
409 * handling reinvoke the handler shortly if we already got there. 411 * handling reinvoke the handler shortly if we already got there.
410 */ 412 */
411 if (!elv_queue_empty(q)) { 413 if (!elv_queue_empty(q))
412 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { 414 blk_invoke_request_fn(q);
413 q->request_fn(q);
414 queue_flag_clear(QUEUE_FLAG_REENTER, q);
415 } else {
416 blk_plug_device(q);
417 kblockd_schedule_work(&q->unplug_work);
418 }
419 }
420} 415}
421EXPORT_SYMBOL(__blk_run_queue); 416EXPORT_SYMBOL(__blk_run_queue);
422 417
@@ -441,6 +436,14 @@ void blk_put_queue(struct request_queue *q)
441 436
442void blk_cleanup_queue(struct request_queue *q) 437void blk_cleanup_queue(struct request_queue *q)
443{ 438{
439 /*
440 * We know we have process context here, so we can be a little
441 * cautious and ensure that pending block actions on this device
442 * are done before moving on. Going into this function, we should
443 * not have processes doing IO to this device.
444 */
445 blk_sync_queue(q);
446
444 mutex_lock(&q->sysfs_lock); 447 mutex_lock(&q->sysfs_lock);
445 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); 448 queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
446 mutex_unlock(&q->sysfs_lock); 449 mutex_unlock(&q->sysfs_lock);
@@ -496,6 +499,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
496 } 499 }
497 500
498 init_timer(&q->unplug_timer); 501 init_timer(&q->unplug_timer);
502 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
503 INIT_LIST_HEAD(&q->timeout_list);
499 504
500 kobject_init(&q->kobj, &blk_queue_ktype); 505 kobject_init(&q->kobj, &blk_queue_ktype);
501 506
@@ -531,7 +536,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node);
531 * request queue; this lock will be taken also from interrupt context, so irq 536 * request queue; this lock will be taken also from interrupt context, so irq
532 * disabling is needed for it. 537 * disabling is needed for it.
533 * 538 *
534 * Function returns a pointer to the initialized request queue, or NULL if 539 * Function returns a pointer to the initialized request queue, or %NULL if
535 * it didn't succeed. 540 * it didn't succeed.
536 * 541 *
537 * Note: 542 * Note:
@@ -569,7 +574,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
569 q->request_fn = rfn; 574 q->request_fn = rfn;
570 q->prep_rq_fn = NULL; 575 q->prep_rq_fn = NULL;
571 q->unplug_fn = generic_unplug_device; 576 q->unplug_fn = generic_unplug_device;
572 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); 577 q->queue_flags = (1 << QUEUE_FLAG_CLUSTER |
578 1 << QUEUE_FLAG_STACKABLE);
573 q->queue_lock = lock; 579 q->queue_lock = lock;
574 580
575 blk_queue_segment_boundary(q, 0xffffffff); 581 blk_queue_segment_boundary(q, 0xffffffff);
@@ -624,10 +630,6 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
624 630
625 blk_rq_init(q, rq); 631 blk_rq_init(q, rq);
626 632
627 /*
628 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
629 * see bio.h and blkdev.h
630 */
631 rq->cmd_flags = rw | REQ_ALLOCED; 633 rq->cmd_flags = rw | REQ_ALLOCED;
632 634
633 if (priv) { 635 if (priv) {
@@ -888,9 +890,11 @@ EXPORT_SYMBOL(blk_get_request);
888 */ 890 */
889void blk_start_queueing(struct request_queue *q) 891void blk_start_queueing(struct request_queue *q)
890{ 892{
891 if (!blk_queue_plugged(q)) 893 if (!blk_queue_plugged(q)) {
894 if (unlikely(blk_queue_stopped(q)))
895 return;
892 q->request_fn(q); 896 q->request_fn(q);
893 else 897 } else
894 __generic_unplug_device(q); 898 __generic_unplug_device(q);
895} 899}
896EXPORT_SYMBOL(blk_start_queueing); 900EXPORT_SYMBOL(blk_start_queueing);
@@ -907,6 +911,8 @@ EXPORT_SYMBOL(blk_start_queueing);
907 */ 911 */
908void blk_requeue_request(struct request_queue *q, struct request *rq) 912void blk_requeue_request(struct request_queue *q, struct request *rq)
909{ 913{
914 blk_delete_timer(rq);
915 blk_clear_rq_complete(rq);
910 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); 916 blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
911 917
912 if (blk_rq_tagged(rq)) 918 if (blk_rq_tagged(rq))
@@ -917,7 +923,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
917EXPORT_SYMBOL(blk_requeue_request); 923EXPORT_SYMBOL(blk_requeue_request);
918 924
919/** 925/**
920 * blk_insert_request - insert a special request in to a request queue 926 * blk_insert_request - insert a special request into a request queue
921 * @q: request queue where request should be inserted 927 * @q: request queue where request should be inserted
922 * @rq: request to be inserted 928 * @rq: request to be inserted
923 * @at_head: insert request at head or tail of queue 929 * @at_head: insert request at head or tail of queue
@@ -927,8 +933,8 @@ EXPORT_SYMBOL(blk_requeue_request);
927 * Many block devices need to execute commands asynchronously, so they don't 933 * Many block devices need to execute commands asynchronously, so they don't
928 * block the whole kernel from preemption during request execution. This is 934 * block the whole kernel from preemption during request execution. This is
929 * accomplished normally by inserting aritficial requests tagged as 935 * accomplished normally by inserting aritficial requests tagged as
930 * REQ_SPECIAL in to the corresponding request queue, and letting them be 936 * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
931 * scheduled for actual execution by the request queue. 937 * be scheduled for actual execution by the request queue.
932 * 938 *
933 * We have the option of inserting the head or the tail of the queue. 939 * We have the option of inserting the head or the tail of the queue.
934 * Typically we use the tail for new ioctls and so forth. We use the head 940 * Typically we use the tail for new ioctls and so forth. We use the head
@@ -982,8 +988,22 @@ static inline void add_request(struct request_queue *q, struct request *req)
982 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 988 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
983} 989}
984 990
985/* 991static void part_round_stats_single(int cpu, struct hd_struct *part,
986 * disk_round_stats() - Round off the performance stats on a struct 992 unsigned long now)
993{
994 if (now == part->stamp)
995 return;
996
997 if (part->in_flight) {
998 __part_stat_add(cpu, part, time_in_queue,
999 part->in_flight * (now - part->stamp));
1000 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1001 }
1002 part->stamp = now;
1003}
1004
1005/**
1006 * part_round_stats() - Round off the performance stats on a struct
987 * disk_stats. 1007 * disk_stats.
988 * 1008 *
989 * The average IO queue length and utilisation statistics are maintained 1009 * The average IO queue length and utilisation statistics are maintained
@@ -997,36 +1017,15 @@ static inline void add_request(struct request_queue *q, struct request *req)
997 * /proc/diskstats. This accounts immediately for all queue usage up to 1017 * /proc/diskstats. This accounts immediately for all queue usage up to
998 * the current jiffies and restarts the counters again. 1018 * the current jiffies and restarts the counters again.
999 */ 1019 */
1000void disk_round_stats(struct gendisk *disk) 1020void part_round_stats(int cpu, struct hd_struct *part)
1001{ 1021{
1002 unsigned long now = jiffies; 1022 unsigned long now = jiffies;
1003 1023
1004 if (now == disk->stamp) 1024 if (part->partno)
1005 return; 1025 part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
1006 1026 part_round_stats_single(cpu, part, now);
1007 if (disk->in_flight) {
1008 __disk_stat_add(disk, time_in_queue,
1009 disk->in_flight * (now - disk->stamp));
1010 __disk_stat_add(disk, io_ticks, (now - disk->stamp));
1011 }
1012 disk->stamp = now;
1013}
1014EXPORT_SYMBOL_GPL(disk_round_stats);
1015
1016void part_round_stats(struct hd_struct *part)
1017{
1018 unsigned long now = jiffies;
1019
1020 if (now == part->stamp)
1021 return;
1022
1023 if (part->in_flight) {
1024 __part_stat_add(part, time_in_queue,
1025 part->in_flight * (now - part->stamp));
1026 __part_stat_add(part, io_ticks, (now - part->stamp));
1027 }
1028 part->stamp = now;
1029} 1027}
1028EXPORT_SYMBOL_GPL(part_round_stats);
1030 1029
1031/* 1030/*
1032 * queue lock must be held 1031 * queue lock must be held
@@ -1070,6 +1069,7 @@ EXPORT_SYMBOL(blk_put_request);
1070 1069
1071void init_request_from_bio(struct request *req, struct bio *bio) 1070void init_request_from_bio(struct request *req, struct bio *bio)
1072{ 1071{
1072 req->cpu = bio->bi_comp_cpu;
1073 req->cmd_type = REQ_TYPE_FS; 1073 req->cmd_type = REQ_TYPE_FS;
1074 1074
1075 /* 1075 /*
@@ -1081,7 +1081,12 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1081 /* 1081 /*
1082 * REQ_BARRIER implies no merging, but lets make it explicit 1082 * REQ_BARRIER implies no merging, but lets make it explicit
1083 */ 1083 */
1084 if (unlikely(bio_barrier(bio))) 1084 if (unlikely(bio_discard(bio))) {
1085 req->cmd_flags |= REQ_DISCARD;
1086 if (bio_barrier(bio))
1087 req->cmd_flags |= REQ_SOFTBARRIER;
1088 req->q->prepare_discard_fn(req->q, req);
1089 } else if (unlikely(bio_barrier(bio)))
1085 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); 1090 req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
1086 1091
1087 if (bio_sync(bio)) 1092 if (bio_sync(bio))
@@ -1099,7 +1104,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1099static int __make_request(struct request_queue *q, struct bio *bio) 1104static int __make_request(struct request_queue *q, struct bio *bio)
1100{ 1105{
1101 struct request *req; 1106 struct request *req;
1102 int el_ret, nr_sectors, barrier, err; 1107 int el_ret, nr_sectors, barrier, discard, err;
1103 const unsigned short prio = bio_prio(bio); 1108 const unsigned short prio = bio_prio(bio);
1104 const int sync = bio_sync(bio); 1109 const int sync = bio_sync(bio);
1105 int rw_flags; 1110 int rw_flags;
@@ -1114,7 +1119,14 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1114 blk_queue_bounce(q, &bio); 1119 blk_queue_bounce(q, &bio);
1115 1120
1116 barrier = bio_barrier(bio); 1121 barrier = bio_barrier(bio);
1117 if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { 1122 if (unlikely(barrier) && bio_has_data(bio) &&
1123 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1124 err = -EOPNOTSUPP;
1125 goto end_io;
1126 }
1127
1128 discard = bio_discard(bio);
1129 if (unlikely(discard) && !q->prepare_discard_fn) {
1118 err = -EOPNOTSUPP; 1130 err = -EOPNOTSUPP;
1119 goto end_io; 1131 goto end_io;
1120 } 1132 }
@@ -1138,6 +1150,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1138 req->biotail = bio; 1150 req->biotail = bio;
1139 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1151 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
1140 req->ioprio = ioprio_best(req->ioprio, prio); 1152 req->ioprio = ioprio_best(req->ioprio, prio);
1153 if (!blk_rq_cpu_valid(req))
1154 req->cpu = bio->bi_comp_cpu;
1141 drive_stat_acct(req, 0); 1155 drive_stat_acct(req, 0);
1142 if (!attempt_back_merge(q, req)) 1156 if (!attempt_back_merge(q, req))
1143 elv_merged_request(q, req, el_ret); 1157 elv_merged_request(q, req, el_ret);
@@ -1165,6 +1179,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1165 req->sector = req->hard_sector = bio->bi_sector; 1179 req->sector = req->hard_sector = bio->bi_sector;
1166 req->nr_sectors = req->hard_nr_sectors += nr_sectors; 1180 req->nr_sectors = req->hard_nr_sectors += nr_sectors;
1167 req->ioprio = ioprio_best(req->ioprio, prio); 1181 req->ioprio = ioprio_best(req->ioprio, prio);
1182 if (!blk_rq_cpu_valid(req))
1183 req->cpu = bio->bi_comp_cpu;
1168 drive_stat_acct(req, 0); 1184 drive_stat_acct(req, 0);
1169 if (!attempt_front_merge(q, req)) 1185 if (!attempt_front_merge(q, req))
1170 elv_merged_request(q, req, el_ret); 1186 elv_merged_request(q, req, el_ret);
@@ -1200,13 +1216,15 @@ get_rq:
1200 init_request_from_bio(req, bio); 1216 init_request_from_bio(req, bio);
1201 1217
1202 spin_lock_irq(q->queue_lock); 1218 spin_lock_irq(q->queue_lock);
1219 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1220 bio_flagged(bio, BIO_CPU_AFFINE))
1221 req->cpu = blk_cpu_to_group(smp_processor_id());
1203 if (elv_queue_empty(q)) 1222 if (elv_queue_empty(q))
1204 blk_plug_device(q); 1223 blk_plug_device(q);
1205 add_request(q, req); 1224 add_request(q, req);
1206out: 1225out:
1207 if (sync) 1226 if (sync)
1208 __generic_unplug_device(q); 1227 __generic_unplug_device(q);
1209
1210 spin_unlock_irq(q->queue_lock); 1228 spin_unlock_irq(q->queue_lock);
1211 return 0; 1229 return 0;
1212 1230
@@ -1260,8 +1278,9 @@ __setup("fail_make_request=", setup_fail_make_request);
1260 1278
1261static int should_fail_request(struct bio *bio) 1279static int should_fail_request(struct bio *bio)
1262{ 1280{
1263 if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || 1281 struct hd_struct *part = bio->bi_bdev->bd_part;
1264 (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) 1282
1283 if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
1265 return should_fail(&fail_make_request, bio->bi_size); 1284 return should_fail(&fail_make_request, bio->bi_size);
1266 1285
1267 return 0; 1286 return 0;
@@ -1314,7 +1333,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1314} 1333}
1315 1334
1316/** 1335/**
1317 * generic_make_request: hand a buffer to its device driver for I/O 1336 * generic_make_request - hand a buffer to its device driver for I/O
1318 * @bio: The bio describing the location in memory and on the device. 1337 * @bio: The bio describing the location in memory and on the device.
1319 * 1338 *
1320 * generic_make_request() is used to make I/O requests of block 1339 * generic_make_request() is used to make I/O requests of block
@@ -1409,7 +1428,8 @@ end_io:
1409 1428
1410 if (bio_check_eod(bio, nr_sectors)) 1429 if (bio_check_eod(bio, nr_sectors))
1411 goto end_io; 1430 goto end_io;
1412 if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { 1431 if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
1432 (bio_discard(bio) && !q->prepare_discard_fn)) {
1413 err = -EOPNOTSUPP; 1433 err = -EOPNOTSUPP;
1414 goto end_io; 1434 goto end_io;
1415 } 1435 }
@@ -1471,13 +1491,13 @@ void generic_make_request(struct bio *bio)
1471EXPORT_SYMBOL(generic_make_request); 1491EXPORT_SYMBOL(generic_make_request);
1472 1492
1473/** 1493/**
1474 * submit_bio: submit a bio to the block device layer for I/O 1494 * submit_bio - submit a bio to the block device layer for I/O
1475 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) 1495 * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
1476 * @bio: The &struct bio which describes the I/O 1496 * @bio: The &struct bio which describes the I/O
1477 * 1497 *
1478 * submit_bio() is very similar in purpose to generic_make_request(), and 1498 * submit_bio() is very similar in purpose to generic_make_request(), and
1479 * uses that function to do most of the work. Both are fairly rough 1499 * uses that function to do most of the work. Both are fairly rough
1480 * interfaces, @bio must be presetup and ready for I/O. 1500 * interfaces; @bio must be presetup and ready for I/O.
1481 * 1501 *
1482 */ 1502 */
1483void submit_bio(int rw, struct bio *bio) 1503void submit_bio(int rw, struct bio *bio)
@@ -1490,11 +1510,7 @@ void submit_bio(int rw, struct bio *bio)
1490 * If it's a regular read/write or a barrier with data attached, 1510 * If it's a regular read/write or a barrier with data attached,
1491 * go through the normal accounting stuff before submission. 1511 * go through the normal accounting stuff before submission.
1492 */ 1512 */
1493 if (!bio_empty_barrier(bio)) { 1513 if (bio_has_data(bio)) {
1494
1495 BIO_BUG_ON(!bio->bi_size);
1496 BIO_BUG_ON(!bio->bi_io_vec);
1497
1498 if (rw & WRITE) { 1514 if (rw & WRITE) {
1499 count_vm_events(PGPGOUT, count); 1515 count_vm_events(PGPGOUT, count);
1500 } else { 1516 } else {
@@ -1517,9 +1533,90 @@ void submit_bio(int rw, struct bio *bio)
1517EXPORT_SYMBOL(submit_bio); 1533EXPORT_SYMBOL(submit_bio);
1518 1534
1519/** 1535/**
1536 * blk_rq_check_limits - Helper function to check a request for the queue limit
1537 * @q: the queue
1538 * @rq: the request being checked
1539 *
1540 * Description:
1541 * @rq may have been made based on weaker limitations of upper-level queues
1542 * in request stacking drivers, and it may violate the limitation of @q.
1543 * Since the block layer and the underlying device driver trust @rq
1544 * after it is inserted to @q, it should be checked against @q before
1545 * the insertion using this generic function.
1546 *
1547 * This function should also be useful for request stacking drivers
1548 * in some cases below, so export this fuction.
1549 * Request stacking drivers like request-based dm may change the queue
1550 * limits while requests are in the queue (e.g. dm's table swapping).
1551 * Such request stacking drivers should check those requests agaist
1552 * the new queue limits again when they dispatch those requests,
1553 * although such checkings are also done against the old queue limits
1554 * when submitting requests.
1555 */
1556int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1557{
1558 if (rq->nr_sectors > q->max_sectors ||
1559 rq->data_len > q->max_hw_sectors << 9) {
1560 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1561 return -EIO;
1562 }
1563
1564 /*
1565 * queue's settings related to segment counting like q->bounce_pfn
1566 * may differ from that of other stacking queues.
1567 * Recalculate it to check the request correctly on this queue's
1568 * limitation.
1569 */
1570 blk_recalc_rq_segments(rq);
1571 if (rq->nr_phys_segments > q->max_phys_segments ||
1572 rq->nr_phys_segments > q->max_hw_segments) {
1573 printk(KERN_ERR "%s: over max segments limit.\n", __func__);
1574 return -EIO;
1575 }
1576
1577 return 0;
1578}
1579EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1580
1581/**
1582 * blk_insert_cloned_request - Helper for stacking drivers to submit a request
1583 * @q: the queue to submit the request
1584 * @rq: the request being queued
1585 */
1586int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1587{
1588 unsigned long flags;
1589
1590 if (blk_rq_check_limits(q, rq))
1591 return -EIO;
1592
1593#ifdef CONFIG_FAIL_MAKE_REQUEST
1594 if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
1595 should_fail(&fail_make_request, blk_rq_bytes(rq)))
1596 return -EIO;
1597#endif
1598
1599 spin_lock_irqsave(q->queue_lock, flags);
1600
1601 /*
1602 * Submitting request must be dequeued before calling this function
1603 * because it will be linked to another request_queue
1604 */
1605 BUG_ON(blk_queued_rq(rq));
1606
1607 drive_stat_acct(rq, 1);
1608 __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
1609
1610 spin_unlock_irqrestore(q->queue_lock, flags);
1611
1612 return 0;
1613}
1614EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1615
1616/**
1520 * __end_that_request_first - end I/O on a request 1617 * __end_that_request_first - end I/O on a request
1521 * @req: the request being processed 1618 * @req: the request being processed
1522 * @error: 0 for success, < 0 for error 1619 * @error: %0 for success, < %0 for error
1523 * @nr_bytes: number of bytes to complete 1620 * @nr_bytes: number of bytes to complete
1524 * 1621 *
1525 * Description: 1622 * Description:
@@ -1527,8 +1624,8 @@ EXPORT_SYMBOL(submit_bio);
1527 * for the next range of segments (if any) in the cluster. 1624 * for the next range of segments (if any) in the cluster.
1528 * 1625 *
1529 * Return: 1626 * Return:
1530 * 0 - we are done with this request, call end_that_request_last() 1627 * %0 - we are done with this request, call end_that_request_last()
1531 * 1 - still buffers pending for this request 1628 * %1 - still buffers pending for this request
1532 **/ 1629 **/
1533static int __end_that_request_first(struct request *req, int error, 1630static int __end_that_request_first(struct request *req, int error,
1534 int nr_bytes) 1631 int nr_bytes)
@@ -1539,7 +1636,7 @@ static int __end_that_request_first(struct request *req, int error,
1539 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); 1636 blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
1540 1637
1541 /* 1638 /*
1542 * for a REQ_BLOCK_PC request, we want to carry any eventual 1639 * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
1543 * sense key with us all the way through 1640 * sense key with us all the way through
1544 */ 1641 */
1545 if (!blk_pc_request(req)) 1642 if (!blk_pc_request(req))
@@ -1552,11 +1649,14 @@ static int __end_that_request_first(struct request *req, int error,
1552 } 1649 }
1553 1650
1554 if (blk_fs_request(req) && req->rq_disk) { 1651 if (blk_fs_request(req) && req->rq_disk) {
1555 struct hd_struct *part = get_part(req->rq_disk, req->sector);
1556 const int rw = rq_data_dir(req); 1652 const int rw = rq_data_dir(req);
1653 struct hd_struct *part;
1654 int cpu;
1557 1655
1558 all_stat_add(req->rq_disk, part, sectors[rw], 1656 cpu = part_stat_lock();
1559 nr_bytes >> 9, req->sector); 1657 part = disk_map_sector_rcu(req->rq_disk, req->sector);
1658 part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
1659 part_stat_unlock();
1560 } 1660 }
1561 1661
1562 total_bytes = bio_nbytes = 0; 1662 total_bytes = bio_nbytes = 0;
@@ -1641,88 +1741,14 @@ static int __end_that_request_first(struct request *req, int error,
1641} 1741}
1642 1742
1643/* 1743/*
1644 * splice the completion data to a local structure and hand off to
1645 * process_completion_queue() to complete the requests
1646 */
1647static void blk_done_softirq(struct softirq_action *h)
1648{
1649 struct list_head *cpu_list, local_list;
1650
1651 local_irq_disable();
1652 cpu_list = &__get_cpu_var(blk_cpu_done);
1653 list_replace_init(cpu_list, &local_list);
1654 local_irq_enable();
1655
1656 while (!list_empty(&local_list)) {
1657 struct request *rq;
1658
1659 rq = list_entry(local_list.next, struct request, donelist);
1660 list_del_init(&rq->donelist);
1661 rq->q->softirq_done_fn(rq);
1662 }
1663}
1664
1665static int __cpuinit blk_cpu_notify(struct notifier_block *self,
1666 unsigned long action, void *hcpu)
1667{
1668 /*
1669 * If a CPU goes away, splice its entries to the current CPU
1670 * and trigger a run of the softirq
1671 */
1672 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
1673 int cpu = (unsigned long) hcpu;
1674
1675 local_irq_disable();
1676 list_splice_init(&per_cpu(blk_cpu_done, cpu),
1677 &__get_cpu_var(blk_cpu_done));
1678 raise_softirq_irqoff(BLOCK_SOFTIRQ);
1679 local_irq_enable();
1680 }
1681
1682 return NOTIFY_OK;
1683}
1684
1685
1686static struct notifier_block blk_cpu_notifier __cpuinitdata = {
1687 .notifier_call = blk_cpu_notify,
1688};
1689
1690/**
1691 * blk_complete_request - end I/O on a request
1692 * @req: the request being processed
1693 *
1694 * Description:
1695 * Ends all I/O on a request. It does not handle partial completions,
1696 * unless the driver actually implements this in its completion callback
1697 * through requeueing. The actual completion happens out-of-order,
1698 * through a softirq handler. The user must have registered a completion
1699 * callback through blk_queue_softirq_done().
1700 **/
1701
1702void blk_complete_request(struct request *req)
1703{
1704 struct list_head *cpu_list;
1705 unsigned long flags;
1706
1707 BUG_ON(!req->q->softirq_done_fn);
1708
1709 local_irq_save(flags);
1710
1711 cpu_list = &__get_cpu_var(blk_cpu_done);
1712 list_add_tail(&req->donelist, cpu_list);
1713 raise_softirq_irqoff(BLOCK_SOFTIRQ);
1714
1715 local_irq_restore(flags);
1716}
1717EXPORT_SYMBOL(blk_complete_request);
1718
1719/*
1720 * queue lock must be held 1744 * queue lock must be held
1721 */ 1745 */
1722static void end_that_request_last(struct request *req, int error) 1746static void end_that_request_last(struct request *req, int error)
1723{ 1747{
1724 struct gendisk *disk = req->rq_disk; 1748 struct gendisk *disk = req->rq_disk;
1725 1749
1750 blk_delete_timer(req);
1751
1726 if (blk_rq_tagged(req)) 1752 if (blk_rq_tagged(req))
1727 blk_queue_end_tag(req->q, req); 1753 blk_queue_end_tag(req->q, req);
1728 1754
@@ -1740,16 +1766,18 @@ static void end_that_request_last(struct request *req, int error)
1740 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { 1766 if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
1741 unsigned long duration = jiffies - req->start_time; 1767 unsigned long duration = jiffies - req->start_time;
1742 const int rw = rq_data_dir(req); 1768 const int rw = rq_data_dir(req);
1743 struct hd_struct *part = get_part(disk, req->sector); 1769 struct hd_struct *part;
1744 1770 int cpu;
1745 __all_stat_inc(disk, part, ios[rw], req->sector); 1771
1746 __all_stat_add(disk, part, ticks[rw], duration, req->sector); 1772 cpu = part_stat_lock();
1747 disk_round_stats(disk); 1773 part = disk_map_sector_rcu(disk, req->sector);
1748 disk->in_flight--; 1774
1749 if (part) { 1775 part_stat_inc(cpu, part, ios[rw]);
1750 part_round_stats(part); 1776 part_stat_add(cpu, part, ticks[rw], duration);
1751 part->in_flight--; 1777 part_round_stats(cpu, part);
1752 } 1778 part_dec_in_flight(part);
1779
1780 part_stat_unlock();
1753 } 1781 }
1754 1782
1755 if (req->end_io) 1783 if (req->end_io)
@@ -1762,17 +1790,6 @@ static void end_that_request_last(struct request *req, int error)
1762 } 1790 }
1763} 1791}
1764 1792
1765static inline void __end_request(struct request *rq, int uptodate,
1766 unsigned int nr_bytes)
1767{
1768 int error = 0;
1769
1770 if (uptodate <= 0)
1771 error = uptodate ? uptodate : -EIO;
1772
1773 __blk_end_request(rq, error, nr_bytes);
1774}
1775
1776/** 1793/**
1777 * blk_rq_bytes - Returns bytes left to complete in the entire request 1794 * blk_rq_bytes - Returns bytes left to complete in the entire request
1778 * @rq: the request being processed 1795 * @rq: the request being processed
@@ -1803,74 +1820,57 @@ unsigned int blk_rq_cur_bytes(struct request *rq)
1803EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); 1820EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
1804 1821
1805/** 1822/**
1806 * end_queued_request - end all I/O on a queued request
1807 * @rq: the request being processed
1808 * @uptodate: error value or 0/1 uptodate flag
1809 *
1810 * Description:
1811 * Ends all I/O on a request, and removes it from the block layer queues.
1812 * Not suitable for normal IO completion, unless the driver still has
1813 * the request attached to the block layer.
1814 *
1815 **/
1816void end_queued_request(struct request *rq, int uptodate)
1817{
1818 __end_request(rq, uptodate, blk_rq_bytes(rq));
1819}
1820EXPORT_SYMBOL(end_queued_request);
1821
1822/**
1823 * end_dequeued_request - end all I/O on a dequeued request
1824 * @rq: the request being processed
1825 * @uptodate: error value or 0/1 uptodate flag
1826 *
1827 * Description:
1828 * Ends all I/O on a request. The request must already have been
1829 * dequeued using blkdev_dequeue_request(), as is normally the case
1830 * for most drivers.
1831 *
1832 **/
1833void end_dequeued_request(struct request *rq, int uptodate)
1834{
1835 __end_request(rq, uptodate, blk_rq_bytes(rq));
1836}
1837EXPORT_SYMBOL(end_dequeued_request);
1838
1839
1840/**
1841 * end_request - end I/O on the current segment of the request 1823 * end_request - end I/O on the current segment of the request
1842 * @req: the request being processed 1824 * @req: the request being processed
1843 * @uptodate: error value or 0/1 uptodate flag 1825 * @uptodate: error value or %0/%1 uptodate flag
1844 * 1826 *
1845 * Description: 1827 * Description:
1846 * Ends I/O on the current segment of a request. If that is the only 1828 * Ends I/O on the current segment of a request. If that is the only
1847 * remaining segment, the request is also completed and freed. 1829 * remaining segment, the request is also completed and freed.
1848 * 1830 *
1849 * This is a remnant of how older block drivers handled IO completions. 1831 * This is a remnant of how older block drivers handled I/O completions.
1850 * Modern drivers typically end IO on the full request in one go, unless 1832 * Modern drivers typically end I/O on the full request in one go, unless
1851 * they have a residual value to account for. For that case this function 1833 * they have a residual value to account for. For that case this function
1852 * isn't really useful, unless the residual just happens to be the 1834 * isn't really useful, unless the residual just happens to be the
1853 * full current segment. In other words, don't use this function in new 1835 * full current segment. In other words, don't use this function in new
1854 * code. Either use end_request_completely(), or the 1836 * code. Use blk_end_request() or __blk_end_request() to end a request.
1855 * end_that_request_chunk() (along with end_that_request_last()) for
1856 * partial completions.
1857 *
1858 **/ 1837 **/
1859void end_request(struct request *req, int uptodate) 1838void end_request(struct request *req, int uptodate)
1860{ 1839{
1861 __end_request(req, uptodate, req->hard_cur_sectors << 9); 1840 int error = 0;
1841
1842 if (uptodate <= 0)
1843 error = uptodate ? uptodate : -EIO;
1844
1845 __blk_end_request(req, error, req->hard_cur_sectors << 9);
1862} 1846}
1863EXPORT_SYMBOL(end_request); 1847EXPORT_SYMBOL(end_request);
1864 1848
1849static int end_that_request_data(struct request *rq, int error,
1850 unsigned int nr_bytes, unsigned int bidi_bytes)
1851{
1852 if (rq->bio) {
1853 if (__end_that_request_first(rq, error, nr_bytes))
1854 return 1;
1855
1856 /* Bidi request must be completed as a whole */
1857 if (blk_bidi_rq(rq) &&
1858 __end_that_request_first(rq->next_rq, error, bidi_bytes))
1859 return 1;
1860 }
1861
1862 return 0;
1863}
1864
1865/** 1865/**
1866 * blk_end_io - Generic end_io function to complete a request. 1866 * blk_end_io - Generic end_io function to complete a request.
1867 * @rq: the request being processed 1867 * @rq: the request being processed
1868 * @error: 0 for success, < 0 for error 1868 * @error: %0 for success, < %0 for error
1869 * @nr_bytes: number of bytes to complete @rq 1869 * @nr_bytes: number of bytes to complete @rq
1870 * @bidi_bytes: number of bytes to complete @rq->next_rq 1870 * @bidi_bytes: number of bytes to complete @rq->next_rq
1871 * @drv_callback: function called between completion of bios in the request 1871 * @drv_callback: function called between completion of bios in the request
1872 * and completion of the request. 1872 * and completion of the request.
1873 * If the callback returns non 0, this helper returns without 1873 * If the callback returns non %0, this helper returns without
1874 * completion of the request. 1874 * completion of the request.
1875 * 1875 *
1876 * Description: 1876 * Description:
@@ -1878,8 +1878,8 @@ EXPORT_SYMBOL(end_request);
1878 * If @rq has leftover, sets it up for the next range of segments. 1878 * If @rq has leftover, sets it up for the next range of segments.
1879 * 1879 *
1880 * Return: 1880 * Return:
1881 * 0 - we are done with this request 1881 * %0 - we are done with this request
1882 * 1 - this request is not freed yet, it still has pending buffers. 1882 * %1 - this request is not freed yet, it still has pending buffers.
1883 **/ 1883 **/
1884static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, 1884static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1885 unsigned int bidi_bytes, 1885 unsigned int bidi_bytes,
@@ -1888,15 +1888,8 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1888 struct request_queue *q = rq->q; 1888 struct request_queue *q = rq->q;
1889 unsigned long flags = 0UL; 1889 unsigned long flags = 0UL;
1890 1890
1891 if (blk_fs_request(rq) || blk_pc_request(rq)) { 1891 if (end_that_request_data(rq, error, nr_bytes, bidi_bytes))
1892 if (__end_that_request_first(rq, error, nr_bytes)) 1892 return 1;
1893 return 1;
1894
1895 /* Bidi request must be completed as a whole */
1896 if (blk_bidi_rq(rq) &&
1897 __end_that_request_first(rq->next_rq, error, bidi_bytes))
1898 return 1;
1899 }
1900 1893
1901 /* Special feature for tricky drivers */ 1894 /* Special feature for tricky drivers */
1902 if (drv_callback && drv_callback(rq)) 1895 if (drv_callback && drv_callback(rq))
@@ -1914,7 +1907,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1914/** 1907/**
1915 * blk_end_request - Helper function for drivers to complete the request. 1908 * blk_end_request - Helper function for drivers to complete the request.
1916 * @rq: the request being processed 1909 * @rq: the request being processed
1917 * @error: 0 for success, < 0 for error 1910 * @error: %0 for success, < %0 for error
1918 * @nr_bytes: number of bytes to complete 1911 * @nr_bytes: number of bytes to complete
1919 * 1912 *
1920 * Description: 1913 * Description:
@@ -1922,8 +1915,8 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes,
1922 * If @rq has leftover, sets it up for the next range of segments. 1915 * If @rq has leftover, sets it up for the next range of segments.
1923 * 1916 *
1924 * Return: 1917 * Return:
1925 * 0 - we are done with this request 1918 * %0 - we are done with this request
1926 * 1 - still buffers pending for this request 1919 * %1 - still buffers pending for this request
1927 **/ 1920 **/
1928int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1921int blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
1929{ 1922{
@@ -1934,22 +1927,20 @@ EXPORT_SYMBOL_GPL(blk_end_request);
1934/** 1927/**
1935 * __blk_end_request - Helper function for drivers to complete the request. 1928 * __blk_end_request - Helper function for drivers to complete the request.
1936 * @rq: the request being processed 1929 * @rq: the request being processed
1937 * @error: 0 for success, < 0 for error 1930 * @error: %0 for success, < %0 for error
1938 * @nr_bytes: number of bytes to complete 1931 * @nr_bytes: number of bytes to complete
1939 * 1932 *
1940 * Description: 1933 * Description:
1941 * Must be called with queue lock held unlike blk_end_request(). 1934 * Must be called with queue lock held unlike blk_end_request().
1942 * 1935 *
1943 * Return: 1936 * Return:
1944 * 0 - we are done with this request 1937 * %0 - we are done with this request
1945 * 1 - still buffers pending for this request 1938 * %1 - still buffers pending for this request
1946 **/ 1939 **/
1947int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) 1940int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
1948{ 1941{
1949 if (blk_fs_request(rq) || blk_pc_request(rq)) { 1942 if (rq->bio && __end_that_request_first(rq, error, nr_bytes))
1950 if (__end_that_request_first(rq, error, nr_bytes)) 1943 return 1;
1951 return 1;
1952 }
1953 1944
1954 add_disk_randomness(rq->rq_disk); 1945 add_disk_randomness(rq->rq_disk);
1955 1946
@@ -1962,7 +1953,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
1962/** 1953/**
1963 * blk_end_bidi_request - Helper function for drivers to complete bidi request. 1954 * blk_end_bidi_request - Helper function for drivers to complete bidi request.
1964 * @rq: the bidi request being processed 1955 * @rq: the bidi request being processed
1965 * @error: 0 for success, < 0 for error 1956 * @error: %0 for success, < %0 for error
1966 * @nr_bytes: number of bytes to complete @rq 1957 * @nr_bytes: number of bytes to complete @rq
1967 * @bidi_bytes: number of bytes to complete @rq->next_rq 1958 * @bidi_bytes: number of bytes to complete @rq->next_rq
1968 * 1959 *
@@ -1970,8 +1961,8 @@ EXPORT_SYMBOL_GPL(__blk_end_request);
1970 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. 1961 * Ends I/O on a number of bytes attached to @rq and @rq->next_rq.
1971 * 1962 *
1972 * Return: 1963 * Return:
1973 * 0 - we are done with this request 1964 * %0 - we are done with this request
1974 * 1 - still buffers pending for this request 1965 * %1 - still buffers pending for this request
1975 **/ 1966 **/
1976int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, 1967int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
1977 unsigned int bidi_bytes) 1968 unsigned int bidi_bytes)
@@ -1981,13 +1972,43 @@ int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes,
1981EXPORT_SYMBOL_GPL(blk_end_bidi_request); 1972EXPORT_SYMBOL_GPL(blk_end_bidi_request);
1982 1973
1983/** 1974/**
1975 * blk_update_request - Special helper function for request stacking drivers
1976 * @rq: the request being processed
1977 * @error: %0 for success, < %0 for error
1978 * @nr_bytes: number of bytes to complete @rq
1979 *
1980 * Description:
1981 * Ends I/O on a number of bytes attached to @rq, but doesn't complete
1982 * the request structure even if @rq doesn't have leftover.
1983 * If @rq has leftover, sets it up for the next range of segments.
1984 *
1985 * This special helper function is only for request stacking drivers
1986 * (e.g. request-based dm) so that they can handle partial completion.
1987 * Actual device drivers should use blk_end_request instead.
1988 */
1989void blk_update_request(struct request *rq, int error, unsigned int nr_bytes)
1990{
1991 if (!end_that_request_data(rq, error, nr_bytes, 0)) {
1992 /*
1993 * These members are not updated in end_that_request_data()
1994 * when all bios are completed.
1995 * Update them so that the request stacking driver can find
1996 * how many bytes remain in the request later.
1997 */
1998 rq->nr_sectors = rq->hard_nr_sectors = 0;
1999 rq->current_nr_sectors = rq->hard_cur_sectors = 0;
2000 }
2001}
2002EXPORT_SYMBOL_GPL(blk_update_request);
2003
2004/**
1984 * blk_end_request_callback - Special helper function for tricky drivers 2005 * blk_end_request_callback - Special helper function for tricky drivers
1985 * @rq: the request being processed 2006 * @rq: the request being processed
1986 * @error: 0 for success, < 0 for error 2007 * @error: %0 for success, < %0 for error
1987 * @nr_bytes: number of bytes to complete 2008 * @nr_bytes: number of bytes to complete
1988 * @drv_callback: function called between completion of bios in the request 2009 * @drv_callback: function called between completion of bios in the request
1989 * and completion of the request. 2010 * and completion of the request.
1990 * If the callback returns non 0, this helper returns without 2011 * If the callback returns non %0, this helper returns without
1991 * completion of the request. 2012 * completion of the request.
1992 * 2013 *
1993 * Description: 2014 * Description:
@@ -2000,10 +2021,10 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request);
2000 * Don't use this interface in other places anymore. 2021 * Don't use this interface in other places anymore.
2001 * 2022 *
2002 * Return: 2023 * Return:
2003 * 0 - we are done with this request 2024 * %0 - we are done with this request
2004 * 1 - this request is not freed yet. 2025 * %1 - this request is not freed yet.
2005 * this request still has pending buffers or 2026 * this request still has pending buffers or
2006 * the driver doesn't want to finish this request yet. 2027 * the driver doesn't want to finish this request yet.
2007 **/ 2028 **/
2008int blk_end_request_callback(struct request *rq, int error, 2029int blk_end_request_callback(struct request *rq, int error,
2009 unsigned int nr_bytes, 2030 unsigned int nr_bytes,
@@ -2016,15 +2037,17 @@ EXPORT_SYMBOL_GPL(blk_end_request_callback);
2016void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2037void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2017 struct bio *bio) 2038 struct bio *bio)
2018{ 2039{
2019 /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ 2040 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
2041 we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
2020 rq->cmd_flags |= (bio->bi_rw & 3); 2042 rq->cmd_flags |= (bio->bi_rw & 3);
2021 2043
2022 rq->nr_phys_segments = bio_phys_segments(q, bio); 2044 if (bio_has_data(bio)) {
2023 rq->nr_hw_segments = bio_hw_segments(q, bio); 2045 rq->nr_phys_segments = bio_phys_segments(q, bio);
2046 rq->buffer = bio_data(bio);
2047 }
2024 rq->current_nr_sectors = bio_cur_sectors(bio); 2048 rq->current_nr_sectors = bio_cur_sectors(bio);
2025 rq->hard_cur_sectors = rq->current_nr_sectors; 2049 rq->hard_cur_sectors = rq->current_nr_sectors;
2026 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); 2050 rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
2027 rq->buffer = bio_data(bio);
2028 rq->data_len = bio->bi_size; 2051 rq->data_len = bio->bi_size;
2029 2052
2030 rq->bio = rq->biotail = bio; 2053 rq->bio = rq->biotail = bio;
@@ -2033,7 +2056,35 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2033 rq->rq_disk = bio->bi_bdev->bd_disk; 2056 rq->rq_disk = bio->bi_bdev->bd_disk;
2034} 2057}
2035 2058
2036int kblockd_schedule_work(struct work_struct *work) 2059/**
2060 * blk_lld_busy - Check if underlying low-level drivers of a device are busy
2061 * @q : the queue of the device being checked
2062 *
2063 * Description:
2064 * Check if underlying low-level drivers of a device are busy.
2065 * If the drivers want to export their busy state, they must set own
2066 * exporting function using blk_queue_lld_busy() first.
2067 *
2068 * Basically, this function is used only by request stacking drivers
2069 * to stop dispatching requests to underlying devices when underlying
2070 * devices are busy. This behavior helps more I/O merging on the queue
2071 * of the request stacking driver and prevents I/O throughput regression
2072 * on burst I/O load.
2073 *
2074 * Return:
2075 * 0 - Not busy (The request stacking driver should dispatch request)
2076 * 1 - Busy (The request stacking driver should stop dispatching request)
2077 */
2078int blk_lld_busy(struct request_queue *q)
2079{
2080 if (q->lld_busy_fn)
2081 return q->lld_busy_fn(q);
2082
2083 return 0;
2084}
2085EXPORT_SYMBOL_GPL(blk_lld_busy);
2086
2087int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2037{ 2088{
2038 return queue_work(kblockd_workqueue, work); 2089 return queue_work(kblockd_workqueue, work);
2039} 2090}
@@ -2047,8 +2098,6 @@ EXPORT_SYMBOL(kblockd_flush_work);
2047 2098
2048int __init blk_dev_init(void) 2099int __init blk_dev_init(void)
2049{ 2100{
2050 int i;
2051
2052 kblockd_workqueue = create_workqueue("kblockd"); 2101 kblockd_workqueue = create_workqueue("kblockd");
2053 if (!kblockd_workqueue) 2102 if (!kblockd_workqueue)
2054 panic("Failed to create kblockd\n"); 2103 panic("Failed to create kblockd\n");
@@ -2059,12 +2108,6 @@ int __init blk_dev_init(void)
2059 blk_requestq_cachep = kmem_cache_create("blkdev_queue", 2108 blk_requestq_cachep = kmem_cache_create("blkdev_queue",
2060 sizeof(struct request_queue), 0, SLAB_PANIC, NULL); 2109 sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
2061 2110
2062 for_each_possible_cpu(i)
2063 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
2064
2065 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
2066 register_hotcpu_notifier(&blk_cpu_notifier);
2067
2068 return 0; 2111 return 0;
2069} 2112}
2070 2113
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 9bceff7674f2..6af716d1e54e 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -16,7 +16,7 @@
16/** 16/**
17 * blk_end_sync_rq - executes a completion event on a request 17 * blk_end_sync_rq - executes a completion event on a request
18 * @rq: request to complete 18 * @rq: request to complete
19 * @error: end io status of the request 19 * @error: end I/O status of the request
20 */ 20 */
21static void blk_end_sync_rq(struct request *rq, int error) 21static void blk_end_sync_rq(struct request *rq, int error)
22{ 22{
@@ -41,7 +41,7 @@ static void blk_end_sync_rq(struct request *rq, int error)
41 * @done: I/O completion handler 41 * @done: I/O completion handler
42 * 42 *
43 * Description: 43 * Description:
44 * Insert a fully prepared request at the back of the io scheduler queue 44 * Insert a fully prepared request at the back of the I/O scheduler queue
45 * for execution. Don't wait for completion. 45 * for execution. Don't wait for completion.
46 */ 46 */
47void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, 47void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
@@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
72 * @at_head: insert request at head or tail of queue 72 * @at_head: insert request at head or tail of queue
73 * 73 *
74 * Description: 74 * Description:
75 * Insert a fully prepared request at the back of the io scheduler queue 75 * Insert a fully prepared request at the back of the I/O scheduler queue
76 * for execution and wait for completion. 76 * for execution and wait for completion.
77 */ 77 */
78int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, 78int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk,
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 3f1a8478cc38..61a8e2f8fdd0 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -108,51 +108,51 @@ new_segment:
108EXPORT_SYMBOL(blk_rq_map_integrity_sg); 108EXPORT_SYMBOL(blk_rq_map_integrity_sg);
109 109
110/** 110/**
111 * blk_integrity_compare - Compare integrity profile of two block devices 111 * blk_integrity_compare - Compare integrity profile of two disks
112 * @b1: Device to compare 112 * @gd1: Disk to compare
113 * @b2: Device to compare 113 * @gd2: Disk to compare
114 * 114 *
115 * Description: Meta-devices like DM and MD need to verify that all 115 * Description: Meta-devices like DM and MD need to verify that all
116 * sub-devices use the same integrity format before advertising to 116 * sub-devices use the same integrity format before advertising to
117 * upper layers that they can send/receive integrity metadata. This 117 * upper layers that they can send/receive integrity metadata. This
118 * function can be used to check whether two block devices have 118 * function can be used to check whether two gendisk devices have
119 * compatible integrity formats. 119 * compatible integrity formats.
120 */ 120 */
121int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2) 121int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
122{ 122{
123 struct blk_integrity *b1 = bd1->bd_disk->integrity; 123 struct blk_integrity *b1 = gd1->integrity;
124 struct blk_integrity *b2 = bd2->bd_disk->integrity; 124 struct blk_integrity *b2 = gd2->integrity;
125 125
126 BUG_ON(bd1->bd_disk == NULL); 126 if (!b1 && !b2)
127 BUG_ON(bd2->bd_disk == NULL); 127 return 0;
128 128
129 if (!b1 || !b2) 129 if (!b1 || !b2)
130 return 0; 130 return -1;
131 131
132 if (b1->sector_size != b2->sector_size) { 132 if (b1->sector_size != b2->sector_size) {
133 printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__, 133 printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
134 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, 134 gd1->disk_name, gd2->disk_name,
135 b1->sector_size, b2->sector_size); 135 b1->sector_size, b2->sector_size);
136 return -1; 136 return -1;
137 } 137 }
138 138
139 if (b1->tuple_size != b2->tuple_size) { 139 if (b1->tuple_size != b2->tuple_size) {
140 printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__, 140 printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
141 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, 141 gd1->disk_name, gd2->disk_name,
142 b1->tuple_size, b2->tuple_size); 142 b1->tuple_size, b2->tuple_size);
143 return -1; 143 return -1;
144 } 144 }
145 145
146 if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) { 146 if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
147 printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__, 147 printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
148 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, 148 gd1->disk_name, gd2->disk_name,
149 b1->tag_size, b2->tag_size); 149 b1->tag_size, b2->tag_size);
150 return -1; 150 return -1;
151 } 151 }
152 152
153 if (strcmp(b1->name, b2->name)) { 153 if (strcmp(b1->name, b2->name)) {
154 printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__, 154 printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
155 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, 155 gd1->disk_name, gd2->disk_name,
156 b1->name, b2->name); 156 b1->name, b2->name);
157 return -1; 157 return -1;
158 } 158 }
@@ -331,7 +331,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
331 return -1; 331 return -1;
332 332
333 if (kobject_init_and_add(&bi->kobj, &integrity_ktype, 333 if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
334 &disk->dev.kobj, "%s", "integrity")) { 334 &disk_to_dev(disk)->kobj,
335 "%s", "integrity")) {
335 kmem_cache_free(integrity_cachep, bi); 336 kmem_cache_free(integrity_cachep, bi);
336 return -1; 337 return -1;
337 } 338 }
@@ -375,7 +376,7 @@ void blk_integrity_unregister(struct gendisk *disk)
375 376
376 kobject_uevent(&bi->kobj, KOBJ_REMOVE); 377 kobject_uevent(&bi->kobj, KOBJ_REMOVE);
377 kobject_del(&bi->kobj); 378 kobject_del(&bi->kobj);
378 kobject_put(&disk->dev.kobj);
379 kmem_cache_free(integrity_cachep, bi); 379 kmem_cache_free(integrity_cachep, bi);
380 disk->integrity = NULL;
380} 381}
381EXPORT_SYMBOL(blk_integrity_unregister); 382EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/block/blk-map.c b/block/blk-map.c
index af37e4ae62f5..4849fa36161e 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -41,10 +41,10 @@ static int __blk_rq_unmap_user(struct bio *bio)
41} 41}
42 42
43static int __blk_rq_map_user(struct request_queue *q, struct request *rq, 43static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
44 void __user *ubuf, unsigned int len) 44 struct rq_map_data *map_data, void __user *ubuf,
45 unsigned int len, int null_mapped, gfp_t gfp_mask)
45{ 46{
46 unsigned long uaddr; 47 unsigned long uaddr;
47 unsigned int alignment;
48 struct bio *bio, *orig_bio; 48 struct bio *bio, *orig_bio;
49 int reading, ret; 49 int reading, ret;
50 50
@@ -55,15 +55,17 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
55 * direct dma. else, set up kernel bounce buffers 55 * direct dma. else, set up kernel bounce buffers
56 */ 56 */
57 uaddr = (unsigned long) ubuf; 57 uaddr = (unsigned long) ubuf;
58 alignment = queue_dma_alignment(q) | q->dma_pad_mask; 58 if (blk_rq_aligned(q, ubuf, len) && !map_data)
59 if (!(uaddr & alignment) && !(len & alignment)) 59 bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
60 bio = bio_map_user(q, NULL, uaddr, len, reading);
61 else 60 else
62 bio = bio_copy_user(q, uaddr, len, reading); 61 bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask);
63 62
64 if (IS_ERR(bio)) 63 if (IS_ERR(bio))
65 return PTR_ERR(bio); 64 return PTR_ERR(bio);
66 65
66 if (null_mapped)
67 bio->bi_flags |= (1 << BIO_NULL_MAPPED);
68
67 orig_bio = bio; 69 orig_bio = bio;
68 blk_queue_bounce(q, &bio); 70 blk_queue_bounce(q, &bio);
69 71
@@ -85,17 +87,19 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
85} 87}
86 88
87/** 89/**
88 * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage 90 * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage
89 * @q: request queue where request should be inserted 91 * @q: request queue where request should be inserted
90 * @rq: request structure to fill 92 * @rq: request structure to fill
93 * @map_data: pointer to the rq_map_data holding pages (if necessary)
91 * @ubuf: the user buffer 94 * @ubuf: the user buffer
92 * @len: length of user data 95 * @len: length of user data
96 * @gfp_mask: memory allocation flags
93 * 97 *
94 * Description: 98 * Description:
95 * Data will be mapped directly for zero copy io, if possible. Otherwise 99 * Data will be mapped directly for zero copy I/O, if possible. Otherwise
96 * a kernel bounce buffer is used. 100 * a kernel bounce buffer is used.
97 * 101 *
98 * A matching blk_rq_unmap_user() must be issued at the end of io, while 102 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
99 * still in process context. 103 * still in process context.
100 * 104 *
101 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 105 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
@@ -105,16 +109,22 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
105 * unmapping. 109 * unmapping.
106 */ 110 */
107int blk_rq_map_user(struct request_queue *q, struct request *rq, 111int blk_rq_map_user(struct request_queue *q, struct request *rq,
108 void __user *ubuf, unsigned long len) 112 struct rq_map_data *map_data, void __user *ubuf,
113 unsigned long len, gfp_t gfp_mask)
109{ 114{
110 unsigned long bytes_read = 0; 115 unsigned long bytes_read = 0;
111 struct bio *bio = NULL; 116 struct bio *bio = NULL;
112 int ret; 117 int ret, null_mapped = 0;
113 118
114 if (len > (q->max_hw_sectors << 9)) 119 if (len > (q->max_hw_sectors << 9))
115 return -EINVAL; 120 return -EINVAL;
116 if (!len || !ubuf) 121 if (!len)
117 return -EINVAL; 122 return -EINVAL;
123 if (!ubuf) {
124 if (!map_data || rq_data_dir(rq) != READ)
125 return -EINVAL;
126 null_mapped = 1;
127 }
118 128
119 while (bytes_read != len) { 129 while (bytes_read != len) {
120 unsigned long map_len, end, start; 130 unsigned long map_len, end, start;
@@ -132,7 +142,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
132 if (end - start > BIO_MAX_PAGES) 142 if (end - start > BIO_MAX_PAGES)
133 map_len -= PAGE_SIZE; 143 map_len -= PAGE_SIZE;
134 144
135 ret = __blk_rq_map_user(q, rq, ubuf, map_len); 145 ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len,
146 null_mapped, gfp_mask);
136 if (ret < 0) 147 if (ret < 0)
137 goto unmap_rq; 148 goto unmap_rq;
138 if (!bio) 149 if (!bio)
@@ -154,18 +165,20 @@ unmap_rq:
154EXPORT_SYMBOL(blk_rq_map_user); 165EXPORT_SYMBOL(blk_rq_map_user);
155 166
156/** 167/**
157 * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage 168 * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
158 * @q: request queue where request should be inserted 169 * @q: request queue where request should be inserted
159 * @rq: request to map data to 170 * @rq: request to map data to
171 * @map_data: pointer to the rq_map_data holding pages (if necessary)
160 * @iov: pointer to the iovec 172 * @iov: pointer to the iovec
161 * @iov_count: number of elements in the iovec 173 * @iov_count: number of elements in the iovec
162 * @len: I/O byte count 174 * @len: I/O byte count
175 * @gfp_mask: memory allocation flags
163 * 176 *
164 * Description: 177 * Description:
165 * Data will be mapped directly for zero copy io, if possible. Otherwise 178 * Data will be mapped directly for zero copy I/O, if possible. Otherwise
166 * a kernel bounce buffer is used. 179 * a kernel bounce buffer is used.
167 * 180 *
168 * A matching blk_rq_unmap_user() must be issued at the end of io, while 181 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while
169 * still in process context. 182 * still in process context.
170 * 183 *
171 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 184 * Note: The mapped bio may need to be bounced through blk_queue_bounce()
@@ -175,7 +188,8 @@ EXPORT_SYMBOL(blk_rq_map_user);
175 * unmapping. 188 * unmapping.
176 */ 189 */
177int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 190int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
178 struct sg_iovec *iov, int iov_count, unsigned int len) 191 struct rq_map_data *map_data, struct sg_iovec *iov,
192 int iov_count, unsigned int len, gfp_t gfp_mask)
179{ 193{
180 struct bio *bio; 194 struct bio *bio;
181 int i, read = rq_data_dir(rq) == READ; 195 int i, read = rq_data_dir(rq) == READ;
@@ -193,10 +207,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
193 } 207 }
194 } 208 }
195 209
196 if (unaligned || (q->dma_pad_mask & len)) 210 if (unaligned || (q->dma_pad_mask & len) || map_data)
197 bio = bio_copy_user_iov(q, iov, iov_count, read); 211 bio = bio_copy_user_iov(q, map_data, iov, iov_count, read,
212 gfp_mask);
198 else 213 else
199 bio = bio_map_user_iov(q, NULL, iov, iov_count, read); 214 bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask);
200 215
201 if (IS_ERR(bio)) 216 if (IS_ERR(bio))
202 return PTR_ERR(bio); 217 return PTR_ERR(bio);
@@ -216,6 +231,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
216 rq->buffer = rq->data = NULL; 231 rq->buffer = rq->data = NULL;
217 return 0; 232 return 0;
218} 233}
234EXPORT_SYMBOL(blk_rq_map_user_iov);
219 235
220/** 236/**
221 * blk_rq_unmap_user - unmap a request with user data 237 * blk_rq_unmap_user - unmap a request with user data
@@ -224,7 +240,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
224 * Description: 240 * Description:
225 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 241 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must
226 * supply the original rq->bio from the blk_rq_map_user() return, since 242 * supply the original rq->bio from the blk_rq_map_user() return, since
227 * the io completion may have changed rq->bio. 243 * the I/O completion may have changed rq->bio.
228 */ 244 */
229int blk_rq_unmap_user(struct bio *bio) 245int blk_rq_unmap_user(struct bio *bio)
230{ 246{
@@ -250,7 +266,7 @@ int blk_rq_unmap_user(struct bio *bio)
250EXPORT_SYMBOL(blk_rq_unmap_user); 266EXPORT_SYMBOL(blk_rq_unmap_user);
251 267
252/** 268/**
253 * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage 269 * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage
254 * @q: request queue where request should be inserted 270 * @q: request queue where request should be inserted
255 * @rq: request to fill 271 * @rq: request to fill
256 * @kbuf: the kernel buffer 272 * @kbuf: the kernel buffer
@@ -264,8 +280,6 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
264int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 280int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
265 unsigned int len, gfp_t gfp_mask) 281 unsigned int len, gfp_t gfp_mask)
266{ 282{
267 unsigned long kaddr;
268 unsigned int alignment;
269 int reading = rq_data_dir(rq) == READ; 283 int reading = rq_data_dir(rq) == READ;
270 int do_copy = 0; 284 int do_copy = 0;
271 struct bio *bio; 285 struct bio *bio;
@@ -275,11 +289,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
275 if (!len || !kbuf) 289 if (!len || !kbuf)
276 return -EINVAL; 290 return -EINVAL;
277 291
278 kaddr = (unsigned long)kbuf; 292 do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf);
279 alignment = queue_dma_alignment(q) | q->dma_pad_mask;
280 do_copy = ((kaddr & alignment) || (len & alignment) ||
281 object_is_on_stack(kbuf));
282
283 if (do_copy) 293 if (do_copy)
284 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 294 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
285 else 295 else
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5efc9e7a68b7..908d3e11ac52 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -11,7 +11,7 @@
11 11
12void blk_recalc_rq_sectors(struct request *rq, int nsect) 12void blk_recalc_rq_sectors(struct request *rq, int nsect)
13{ 13{
14 if (blk_fs_request(rq)) { 14 if (blk_fs_request(rq) || blk_discard_rq(rq)) {
15 rq->hard_sector += nsect; 15 rq->hard_sector += nsect;
16 rq->hard_nr_sectors -= nsect; 16 rq->hard_nr_sectors -= nsect;
17 17
@@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect)
41void blk_recalc_rq_segments(struct request *rq) 41void blk_recalc_rq_segments(struct request *rq)
42{ 42{
43 int nr_phys_segs; 43 int nr_phys_segs;
44 int nr_hw_segs;
45 unsigned int phys_size; 44 unsigned int phys_size;
46 unsigned int hw_size;
47 struct bio_vec *bv, *bvprv = NULL; 45 struct bio_vec *bv, *bvprv = NULL;
48 int seg_size; 46 int seg_size;
49 int hw_seg_size;
50 int cluster; 47 int cluster;
51 struct req_iterator iter; 48 struct req_iterator iter;
52 int high, highprv = 1; 49 int high, highprv = 1;
@@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct request *rq)
56 return; 53 return;
57 54
58 cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); 55 cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
59 hw_seg_size = seg_size = 0; 56 seg_size = 0;
60 phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; 57 phys_size = nr_phys_segs = 0;
61 rq_for_each_segment(bv, rq, iter) { 58 rq_for_each_segment(bv, rq, iter) {
62 /* 59 /*
63 * the trick here is making sure that a high page is never 60 * the trick here is making sure that a high page is never
@@ -66,7 +63,7 @@ void blk_recalc_rq_segments(struct request *rq)
66 */ 63 */
67 high = page_to_pfn(bv->bv_page) > q->bounce_pfn; 64 high = page_to_pfn(bv->bv_page) > q->bounce_pfn;
68 if (high || highprv) 65 if (high || highprv)
69 goto new_hw_segment; 66 goto new_segment;
70 if (cluster) { 67 if (cluster) {
71 if (seg_size + bv->bv_len > q->max_segment_size) 68 if (seg_size + bv->bv_len > q->max_segment_size)
72 goto new_segment; 69 goto new_segment;
@@ -74,40 +71,19 @@ void blk_recalc_rq_segments(struct request *rq)
74 goto new_segment; 71 goto new_segment;
75 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) 72 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
76 goto new_segment; 73 goto new_segment;
77 if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
78 goto new_hw_segment;
79 74
80 seg_size += bv->bv_len; 75 seg_size += bv->bv_len;
81 hw_seg_size += bv->bv_len;
82 bvprv = bv; 76 bvprv = bv;
83 continue; 77 continue;
84 } 78 }
85new_segment: 79new_segment:
86 if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
87 !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
88 hw_seg_size += bv->bv_len;
89 else {
90new_hw_segment:
91 if (nr_hw_segs == 1 &&
92 hw_seg_size > rq->bio->bi_hw_front_size)
93 rq->bio->bi_hw_front_size = hw_seg_size;
94 hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
95 nr_hw_segs++;
96 }
97
98 nr_phys_segs++; 80 nr_phys_segs++;
99 bvprv = bv; 81 bvprv = bv;
100 seg_size = bv->bv_len; 82 seg_size = bv->bv_len;
101 highprv = high; 83 highprv = high;
102 } 84 }
103 85
104 if (nr_hw_segs == 1 &&
105 hw_seg_size > rq->bio->bi_hw_front_size)
106 rq->bio->bi_hw_front_size = hw_seg_size;
107 if (hw_seg_size > rq->biotail->bi_hw_back_size)
108 rq->biotail->bi_hw_back_size = hw_seg_size;
109 rq->nr_phys_segments = nr_phys_segs; 86 rq->nr_phys_segments = nr_phys_segs;
110 rq->nr_hw_segments = nr_hw_segs;
111} 87}
112 88
113void blk_recount_segments(struct request_queue *q, struct bio *bio) 89void blk_recount_segments(struct request_queue *q, struct bio *bio)
@@ -120,7 +96,6 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio)
120 blk_recalc_rq_segments(&rq); 96 blk_recalc_rq_segments(&rq);
121 bio->bi_next = nxt; 97 bio->bi_next = nxt;
122 bio->bi_phys_segments = rq.nr_phys_segments; 98 bio->bi_phys_segments = rq.nr_phys_segments;
123 bio->bi_hw_segments = rq.nr_hw_segments;
124 bio->bi_flags |= (1 << BIO_SEG_VALID); 99 bio->bi_flags |= (1 << BIO_SEG_VALID);
125} 100}
126EXPORT_SYMBOL(blk_recount_segments); 101EXPORT_SYMBOL(blk_recount_segments);
@@ -131,13 +106,17 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
131 if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) 106 if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
132 return 0; 107 return 0;
133 108
134 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
135 return 0;
136 if (bio->bi_size + nxt->bi_size > q->max_segment_size) 109 if (bio->bi_size + nxt->bi_size > q->max_segment_size)
137 return 0; 110 return 0;
138 111
112 if (!bio_has_data(bio))
113 return 1;
114
115 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
116 return 0;
117
139 /* 118 /*
140 * bio and nxt are contigous in memory, check if the queue allows 119 * bio and nxt are contiguous in memory; check if the queue allows
141 * these two to be merged into one 120 * these two to be merged into one
142 */ 121 */
143 if (BIO_SEG_BOUNDARY(q, bio, nxt)) 122 if (BIO_SEG_BOUNDARY(q, bio, nxt))
@@ -146,22 +125,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
146 return 0; 125 return 0;
147} 126}
148 127
149static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
150 struct bio *nxt)
151{
152 if (!bio_flagged(bio, BIO_SEG_VALID))
153 blk_recount_segments(q, bio);
154 if (!bio_flagged(nxt, BIO_SEG_VALID))
155 blk_recount_segments(q, nxt);
156 if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
157 BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
158 return 0;
159 if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size)
160 return 0;
161
162 return 1;
163}
164
165/* 128/*
166 * map a request to scatterlist, return number of sg entries setup. Caller 129 * map a request to scatterlist, return number of sg entries setup. Caller
167 * must make sure sg can hold rq->nr_phys_segments entries 130 * must make sure sg can hold rq->nr_phys_segments entries
@@ -275,10 +238,9 @@ static inline int ll_new_hw_segment(struct request_queue *q,
275 struct request *req, 238 struct request *req,
276 struct bio *bio) 239 struct bio *bio)
277{ 240{
278 int nr_hw_segs = bio_hw_segments(q, bio);
279 int nr_phys_segs = bio_phys_segments(q, bio); 241 int nr_phys_segs = bio_phys_segments(q, bio);
280 242
281 if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments 243 if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments
282 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { 244 || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
283 req->cmd_flags |= REQ_NOMERGE; 245 req->cmd_flags |= REQ_NOMERGE;
284 if (req == q->last_merge) 246 if (req == q->last_merge)
@@ -290,7 +252,6 @@ static inline int ll_new_hw_segment(struct request_queue *q,
290 * This will form the start of a new hw segment. Bump both 252 * This will form the start of a new hw segment. Bump both
291 * counters. 253 * counters.
292 */ 254 */
293 req->nr_hw_segments += nr_hw_segs;
294 req->nr_phys_segments += nr_phys_segs; 255 req->nr_phys_segments += nr_phys_segs;
295 return 1; 256 return 1;
296} 257}
@@ -299,7 +260,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
299 struct bio *bio) 260 struct bio *bio)
300{ 261{
301 unsigned short max_sectors; 262 unsigned short max_sectors;
302 int len;
303 263
304 if (unlikely(blk_pc_request(req))) 264 if (unlikely(blk_pc_request(req)))
305 max_sectors = q->max_hw_sectors; 265 max_sectors = q->max_hw_sectors;
@@ -316,19 +276,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
316 blk_recount_segments(q, req->biotail); 276 blk_recount_segments(q, req->biotail);
317 if (!bio_flagged(bio, BIO_SEG_VALID)) 277 if (!bio_flagged(bio, BIO_SEG_VALID))
318 blk_recount_segments(q, bio); 278 blk_recount_segments(q, bio);
319 len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
320 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
321 && !BIOVEC_VIRT_OVERSIZE(len)) {
322 int mergeable = ll_new_mergeable(q, req, bio);
323
324 if (mergeable) {
325 if (req->nr_hw_segments == 1)
326 req->bio->bi_hw_front_size = len;
327 if (bio->bi_hw_segments == 1)
328 bio->bi_hw_back_size = len;
329 }
330 return mergeable;
331 }
332 279
333 return ll_new_hw_segment(q, req, bio); 280 return ll_new_hw_segment(q, req, bio);
334} 281}
@@ -337,7 +284,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
337 struct bio *bio) 284 struct bio *bio)
338{ 285{
339 unsigned short max_sectors; 286 unsigned short max_sectors;
340 int len;
341 287
342 if (unlikely(blk_pc_request(req))) 288 if (unlikely(blk_pc_request(req)))
343 max_sectors = q->max_hw_sectors; 289 max_sectors = q->max_hw_sectors;
@@ -351,23 +297,10 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
351 q->last_merge = NULL; 297 q->last_merge = NULL;
352 return 0; 298 return 0;
353 } 299 }
354 len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
355 if (!bio_flagged(bio, BIO_SEG_VALID)) 300 if (!bio_flagged(bio, BIO_SEG_VALID))
356 blk_recount_segments(q, bio); 301 blk_recount_segments(q, bio);
357 if (!bio_flagged(req->bio, BIO_SEG_VALID)) 302 if (!bio_flagged(req->bio, BIO_SEG_VALID))
358 blk_recount_segments(q, req->bio); 303 blk_recount_segments(q, req->bio);
359 if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
360 !BIOVEC_VIRT_OVERSIZE(len)) {
361 int mergeable = ll_new_mergeable(q, req, bio);
362
363 if (mergeable) {
364 if (bio->bi_hw_segments == 1)
365 bio->bi_hw_front_size = len;
366 if (req->nr_hw_segments == 1)
367 req->biotail->bi_hw_back_size = len;
368 }
369 return mergeable;
370 }
371 304
372 return ll_new_hw_segment(q, req, bio); 305 return ll_new_hw_segment(q, req, bio);
373} 306}
@@ -376,7 +309,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
376 struct request *next) 309 struct request *next)
377{ 310{
378 int total_phys_segments; 311 int total_phys_segments;
379 int total_hw_segments;
380 312
381 /* 313 /*
382 * First check if the either of the requests are re-queued 314 * First check if the either of the requests are re-queued
@@ -398,26 +330,11 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
398 if (total_phys_segments > q->max_phys_segments) 330 if (total_phys_segments > q->max_phys_segments)
399 return 0; 331 return 0;
400 332
401 total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; 333 if (total_phys_segments > q->max_hw_segments)
402 if (blk_hw_contig_segment(q, req->biotail, next->bio)) {
403 int len = req->biotail->bi_hw_back_size +
404 next->bio->bi_hw_front_size;
405 /*
406 * propagate the combined length to the end of the requests
407 */
408 if (req->nr_hw_segments == 1)
409 req->bio->bi_hw_front_size = len;
410 if (next->nr_hw_segments == 1)
411 next->biotail->bi_hw_back_size = len;
412 total_hw_segments--;
413 }
414
415 if (total_hw_segments > q->max_hw_segments)
416 return 0; 334 return 0;
417 335
418 /* Merge is OK... */ 336 /* Merge is OK... */
419 req->nr_phys_segments = total_phys_segments; 337 req->nr_phys_segments = total_phys_segments;
420 req->nr_hw_segments = total_hw_segments;
421 return 1; 338 return 1;
422} 339}
423 340
@@ -470,17 +387,21 @@ static int attempt_merge(struct request_queue *q, struct request *req,
470 elv_merge_requests(q, req, next); 387 elv_merge_requests(q, req, next);
471 388
472 if (req->rq_disk) { 389 if (req->rq_disk) {
473 struct hd_struct *part 390 struct hd_struct *part;
474 = get_part(req->rq_disk, req->sector); 391 int cpu;
475 disk_round_stats(req->rq_disk); 392
476 req->rq_disk->in_flight--; 393 cpu = part_stat_lock();
477 if (part) { 394 part = disk_map_sector_rcu(req->rq_disk, req->sector);
478 part_round_stats(part); 395
479 part->in_flight--; 396 part_round_stats(cpu, part);
480 } 397 part_dec_in_flight(part);
398
399 part_stat_unlock();
481 } 400 }
482 401
483 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 402 req->ioprio = ioprio_best(req->ioprio, next->ioprio);
403 if (blk_rq_cpu_valid(next))
404 req->cpu = next->cpu;
484 405
485 __blk_put_request(q, next); 406 __blk_put_request(q, next);
486 return 1; 407 return 1;
diff --git a/block/blk-settings.c b/block/blk-settings.c
index dfc77012843f..b21dcdb64151 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -33,6 +33,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
33EXPORT_SYMBOL(blk_queue_prep_rq); 33EXPORT_SYMBOL(blk_queue_prep_rq);
34 34
35/** 35/**
36 * blk_queue_set_discard - set a discard_sectors function for queue
37 * @q: queue
38 * @dfn: prepare_discard function
39 *
40 * It's possible for a queue to register a discard callback which is used
41 * to transform a discard request into the appropriate type for the
42 * hardware. If none is registered, then discard requests are failed
43 * with %EOPNOTSUPP.
44 *
45 */
46void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
47{
48 q->prepare_discard_fn = dfn;
49}
50EXPORT_SYMBOL(blk_queue_set_discard);
51
52/**
36 * blk_queue_merge_bvec - set a merge_bvec function for queue 53 * blk_queue_merge_bvec - set a merge_bvec function for queue
37 * @q: queue 54 * @q: queue
38 * @mbfn: merge_bvec_fn 55 * @mbfn: merge_bvec_fn
@@ -60,6 +77,24 @@ void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn)
60} 77}
61EXPORT_SYMBOL(blk_queue_softirq_done); 78EXPORT_SYMBOL(blk_queue_softirq_done);
62 79
80void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout)
81{
82 q->rq_timeout = timeout;
83}
84EXPORT_SYMBOL_GPL(blk_queue_rq_timeout);
85
86void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn)
87{
88 q->rq_timed_out_fn = fn;
89}
90EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out);
91
92void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn)
93{
94 q->lld_busy_fn = fn;
95}
96EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
97
63/** 98/**
64 * blk_queue_make_request - define an alternate make_request function for a device 99 * blk_queue_make_request - define an alternate make_request function for a device
65 * @q: the request queue for the device to be affected 100 * @q: the request queue for the device to be affected
@@ -127,7 +162,7 @@ EXPORT_SYMBOL(blk_queue_make_request);
127 * Different hardware can have different requirements as to what pages 162 * Different hardware can have different requirements as to what pages
128 * it can do I/O directly to. A low level driver can call 163 * it can do I/O directly to. A low level driver can call
129 * blk_queue_bounce_limit to have lower memory pages allocated as bounce 164 * blk_queue_bounce_limit to have lower memory pages allocated as bounce
130 * buffers for doing I/O to pages residing above @page. 165 * buffers for doing I/O to pages residing above @dma_addr.
131 **/ 166 **/
132void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) 167void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr)
133{ 168{
@@ -212,7 +247,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segments);
212 * Description: 247 * Description:
213 * Enables a low level driver to set an upper limit on the number of 248 * Enables a low level driver to set an upper limit on the number of
214 * hw data segments in a request. This would be the largest number of 249 * hw data segments in a request. This would be the largest number of
215 * address/length pairs the host adapter can actually give as once 250 * address/length pairs the host adapter can actually give at once
216 * to the device. 251 * to the device.
217 **/ 252 **/
218void blk_queue_max_hw_segments(struct request_queue *q, 253void blk_queue_max_hw_segments(struct request_queue *q,
@@ -393,7 +428,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary);
393 * @mask: alignment mask 428 * @mask: alignment mask
394 * 429 *
395 * description: 430 * description:
396 * set required memory and length aligment for direct dma transactions. 431 * set required memory and length alignment for direct dma transactions.
397 * this is used when buiding direct io requests for the queue. 432 * this is used when buiding direct io requests for the queue.
398 * 433 *
399 **/ 434 **/
@@ -409,7 +444,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment);
409 * @mask: alignment mask 444 * @mask: alignment mask
410 * 445 *
411 * description: 446 * description:
412 * update required memory and length aligment for direct dma transactions. 447 * update required memory and length alignment for direct dma transactions.
413 * If the requested alignment is larger than the current alignment, then 448 * If the requested alignment is larger than the current alignment, then
414 * the current queue alignment is updated to the new value, otherwise it 449 * the current queue alignment is updated to the new value, otherwise it
415 * is left alone. The design of this is to allow multiple objects 450 * is left alone. The design of this is to allow multiple objects
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
new file mode 100644
index 000000000000..e660d26ca656
--- /dev/null
+++ b/block/blk-softirq.c
@@ -0,0 +1,175 @@
1/*
2 * Functions related to softirq rq completions
3 */
4#include <linux/kernel.h>
5#include <linux/module.h>
6#include <linux/init.h>
7#include <linux/bio.h>
8#include <linux/blkdev.h>
9#include <linux/interrupt.h>
10#include <linux/cpu.h>
11
12#include "blk.h"
13
14static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
15
16/*
17 * Softirq action handler - move entries to local list and loop over them
18 * while passing them to the queue registered handler.
19 */
20static void blk_done_softirq(struct softirq_action *h)
21{
22 struct list_head *cpu_list, local_list;
23
24 local_irq_disable();
25 cpu_list = &__get_cpu_var(blk_cpu_done);
26 list_replace_init(cpu_list, &local_list);
27 local_irq_enable();
28
29 while (!list_empty(&local_list)) {
30 struct request *rq;
31
32 rq = list_entry(local_list.next, struct request, csd.list);
33 list_del_init(&rq->csd.list);
34 rq->q->softirq_done_fn(rq);
35 }
36}
37
38#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
39static void trigger_softirq(void *data)
40{
41 struct request *rq = data;
42 unsigned long flags;
43 struct list_head *list;
44
45 local_irq_save(flags);
46 list = &__get_cpu_var(blk_cpu_done);
47 list_add_tail(&rq->csd.list, list);
48
49 if (list->next == &rq->csd.list)
50 raise_softirq_irqoff(BLOCK_SOFTIRQ);
51
52 local_irq_restore(flags);
53}
54
55/*
56 * Setup and invoke a run of 'trigger_softirq' on the given cpu.
57 */
58static int raise_blk_irq(int cpu, struct request *rq)
59{
60 if (cpu_online(cpu)) {
61 struct call_single_data *data = &rq->csd;
62
63 data->func = trigger_softirq;
64 data->info = rq;
65 data->flags = 0;
66
67 __smp_call_function_single(cpu, data);
68 return 0;
69 }
70
71 return 1;
72}
73#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
74static int raise_blk_irq(int cpu, struct request *rq)
75{
76 return 1;
77}
78#endif
79
80static int __cpuinit blk_cpu_notify(struct notifier_block *self,
81 unsigned long action, void *hcpu)
82{
83 /*
84 * If a CPU goes away, splice its entries to the current CPU
85 * and trigger a run of the softirq
86 */
87 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
88 int cpu = (unsigned long) hcpu;
89
90 local_irq_disable();
91 list_splice_init(&per_cpu(blk_cpu_done, cpu),
92 &__get_cpu_var(blk_cpu_done));
93 raise_softirq_irqoff(BLOCK_SOFTIRQ);
94 local_irq_enable();
95 }
96
97 return NOTIFY_OK;
98}
99
100static struct notifier_block __cpuinitdata blk_cpu_notifier = {
101 .notifier_call = blk_cpu_notify,
102};
103
104void __blk_complete_request(struct request *req)
105{
106 struct request_queue *q = req->q;
107 unsigned long flags;
108 int ccpu, cpu, group_cpu;
109
110 BUG_ON(!q->softirq_done_fn);
111
112 local_irq_save(flags);
113 cpu = smp_processor_id();
114 group_cpu = blk_cpu_to_group(cpu);
115
116 /*
117 * Select completion CPU
118 */
119 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1)
120 ccpu = req->cpu;
121 else
122 ccpu = cpu;
123
124 if (ccpu == cpu || ccpu == group_cpu) {
125 struct list_head *list;
126do_local:
127 list = &__get_cpu_var(blk_cpu_done);
128 list_add_tail(&req->csd.list, list);
129
130 /*
131 * if the list only contains our just added request,
132 * signal a raise of the softirq. If there are already
133 * entries there, someone already raised the irq but it
134 * hasn't run yet.
135 */
136 if (list->next == &req->csd.list)
137 raise_softirq_irqoff(BLOCK_SOFTIRQ);
138 } else if (raise_blk_irq(ccpu, req))
139 goto do_local;
140
141 local_irq_restore(flags);
142}
143
144/**
145 * blk_complete_request - end I/O on a request
146 * @req: the request being processed
147 *
148 * Description:
149 * Ends all I/O on a request. It does not handle partial completions,
150 * unless the driver actually implements this in its completion callback
151 * through requeueing. The actual completion happens out-of-order,
152 * through a softirq handler. The user must have registered a completion
153 * callback through blk_queue_softirq_done().
154 **/
155void blk_complete_request(struct request *req)
156{
157 if (unlikely(blk_should_fake_timeout(req->q)))
158 return;
159 if (!blk_mark_rq_complete(req))
160 __blk_complete_request(req);
161}
162EXPORT_SYMBOL(blk_complete_request);
163
164__init int blk_softirq_init(void)
165{
166 int i;
167
168 for_each_possible_cpu(i)
169 INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
170
171 open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
172 register_hotcpu_notifier(&blk_cpu_notifier);
173 return 0;
174}
175subsys_initcall(blk_softirq_init);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 304ec73ab821..21e275d7eed9 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
156 return ret; 156 return ret;
157} 157}
158 158
159static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
160{
161 unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
162
163 return queue_var_show(set != 0, page);
164}
165
166static ssize_t
167queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
168{
169 ssize_t ret = -EINVAL;
170#if defined(CONFIG_USE_GENERIC_SMP_HELPERS)
171 unsigned long val;
172
173 ret = queue_var_store(&val, page, count);
174 spin_lock_irq(q->queue_lock);
175 if (val)
176 queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
177 else
178 queue_flag_clear(QUEUE_FLAG_SAME_COMP, q);
179 spin_unlock_irq(q->queue_lock);
180#endif
181 return ret;
182}
159 183
160static struct queue_sysfs_entry queue_requests_entry = { 184static struct queue_sysfs_entry queue_requests_entry = {
161 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, 185 .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
@@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_nomerges_entry = {
197 .store = queue_nomerges_store, 221 .store = queue_nomerges_store,
198}; 222};
199 223
224static struct queue_sysfs_entry queue_rq_affinity_entry = {
225 .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
226 .show = queue_rq_affinity_show,
227 .store = queue_rq_affinity_store,
228};
229
200static struct attribute *default_attrs[] = { 230static struct attribute *default_attrs[] = {
201 &queue_requests_entry.attr, 231 &queue_requests_entry.attr,
202 &queue_ra_entry.attr, 232 &queue_ra_entry.attr,
@@ -205,6 +235,7 @@ static struct attribute *default_attrs[] = {
205 &queue_iosched_entry.attr, 235 &queue_iosched_entry.attr,
206 &queue_hw_sector_size_entry.attr, 236 &queue_hw_sector_size_entry.attr,
207 &queue_nomerges_entry.attr, 237 &queue_nomerges_entry.attr,
238 &queue_rq_affinity_entry.attr,
208 NULL, 239 NULL,
209}; 240};
210 241
@@ -310,7 +341,7 @@ int blk_register_queue(struct gendisk *disk)
310 if (!q->request_fn) 341 if (!q->request_fn)
311 return 0; 342 return 0;
312 343
313 ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj), 344 ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
314 "%s", "queue"); 345 "%s", "queue");
315 if (ret < 0) 346 if (ret < 0)
316 return ret; 347 return ret;
@@ -339,6 +370,6 @@ void blk_unregister_queue(struct gendisk *disk)
339 370
340 kobject_uevent(&q->kobj, KOBJ_REMOVE); 371 kobject_uevent(&q->kobj, KOBJ_REMOVE);
341 kobject_del(&q->kobj); 372 kobject_del(&q->kobj);
342 kobject_put(&disk->dev.kobj); 373 kobject_put(&disk_to_dev(disk)->kobj);
343 } 374 }
344} 375}
diff --git a/block/blk-tag.c b/block/blk-tag.c
index ed5166fbc599..c0d419e84ce7 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -29,7 +29,7 @@ EXPORT_SYMBOL(blk_queue_find_tag);
29 * __blk_free_tags - release a given set of tag maintenance info 29 * __blk_free_tags - release a given set of tag maintenance info
30 * @bqt: the tag map to free 30 * @bqt: the tag map to free
31 * 31 *
32 * Tries to free the specified @bqt@. Returns true if it was 32 * Tries to free the specified @bqt. Returns true if it was
33 * actually freed and false if there are still references using it 33 * actually freed and false if there are still references using it
34 */ 34 */
35static int __blk_free_tags(struct blk_queue_tag *bqt) 35static int __blk_free_tags(struct blk_queue_tag *bqt)
@@ -78,7 +78,7 @@ void __blk_queue_free_tags(struct request_queue *q)
78 * blk_free_tags - release a given set of tag maintenance info 78 * blk_free_tags - release a given set of tag maintenance info
79 * @bqt: the tag map to free 79 * @bqt: the tag map to free
80 * 80 *
81 * For externally managed @bqt@ frees the map. Callers of this 81 * For externally managed @bqt frees the map. Callers of this
82 * function must guarantee to have released all the queues that 82 * function must guarantee to have released all the queues that
83 * might have been using this tag map. 83 * might have been using this tag map.
84 */ 84 */
@@ -94,7 +94,7 @@ EXPORT_SYMBOL(blk_free_tags);
94 * @q: the request queue for the device 94 * @q: the request queue for the device
95 * 95 *
96 * Notes: 96 * Notes:
97 * This is used to disabled tagged queuing to a device, yet leave 97 * This is used to disable tagged queuing to a device, yet leave
98 * queue in function. 98 * queue in function.
99 **/ 99 **/
100void blk_queue_free_tags(struct request_queue *q) 100void blk_queue_free_tags(struct request_queue *q)
@@ -271,7 +271,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags);
271 * @rq: the request that has completed 271 * @rq: the request that has completed
272 * 272 *
273 * Description: 273 * Description:
274 * Typically called when end_that_request_first() returns 0, meaning 274 * Typically called when end_that_request_first() returns %0, meaning
275 * all transfers have been done for a request. It's important to call 275 * all transfers have been done for a request. It's important to call
276 * this function before end_that_request_last(), as that will put the 276 * this function before end_that_request_last(), as that will put the
277 * request back on the free list thus corrupting the internal tag list. 277 * request back on the free list thus corrupting the internal tag list.
@@ -337,6 +337,7 @@ EXPORT_SYMBOL(blk_queue_end_tag);
337int blk_queue_start_tag(struct request_queue *q, struct request *rq) 337int blk_queue_start_tag(struct request_queue *q, struct request *rq)
338{ 338{
339 struct blk_queue_tag *bqt = q->queue_tags; 339 struct blk_queue_tag *bqt = q->queue_tags;
340 unsigned max_depth, offset;
340 int tag; 341 int tag;
341 342
342 if (unlikely((rq->cmd_flags & REQ_QUEUED))) { 343 if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
@@ -350,10 +351,19 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
350 /* 351 /*
351 * Protect against shared tag maps, as we may not have exclusive 352 * Protect against shared tag maps, as we may not have exclusive
352 * access to the tag map. 353 * access to the tag map.
354 *
355 * We reserve a few tags just for sync IO, since we don't want
356 * to starve sync IO on behalf of flooding async IO.
353 */ 357 */
358 max_depth = bqt->max_depth;
359 if (rq_is_sync(rq))
360 offset = 0;
361 else
362 offset = max_depth >> 2;
363
354 do { 364 do {
355 tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); 365 tag = find_next_zero_bit(bqt->tag_map, max_depth, offset);
356 if (tag >= bqt->max_depth) 366 if (tag >= max_depth)
357 return 1; 367 return 1;
358 368
359 } while (test_and_set_bit_lock(tag, bqt->tag_map)); 369 } while (test_and_set_bit_lock(tag, bqt->tag_map));
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
new file mode 100644
index 000000000000..972a63f848fb
--- /dev/null
+++ b/block/blk-timeout.c
@@ -0,0 +1,238 @@
1/*
2 * Functions related to generic timeout handling of requests.
3 */
4#include <linux/kernel.h>
5#include <linux/module.h>
6#include <linux/blkdev.h>
7#include <linux/fault-inject.h>
8
9#include "blk.h"
10
11#ifdef CONFIG_FAIL_IO_TIMEOUT
12
13static DECLARE_FAULT_ATTR(fail_io_timeout);
14
15static int __init setup_fail_io_timeout(char *str)
16{
17 return setup_fault_attr(&fail_io_timeout, str);
18}
19__setup("fail_io_timeout=", setup_fail_io_timeout);
20
21int blk_should_fake_timeout(struct request_queue *q)
22{
23 if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
24 return 0;
25
26 return should_fail(&fail_io_timeout, 1);
27}
28
29static int __init fail_io_timeout_debugfs(void)
30{
31 return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
32}
33
34late_initcall(fail_io_timeout_debugfs);
35
36ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr,
37 char *buf)
38{
39 struct gendisk *disk = dev_to_disk(dev);
40 int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags);
41
42 return sprintf(buf, "%d\n", set != 0);
43}
44
45ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
46 const char *buf, size_t count)
47{
48 struct gendisk *disk = dev_to_disk(dev);
49 int val;
50
51 if (count) {
52 struct request_queue *q = disk->queue;
53 char *p = (char *) buf;
54
55 val = simple_strtoul(p, &p, 10);
56 spin_lock_irq(q->queue_lock);
57 if (val)
58 queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
59 else
60 queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
61 spin_unlock_irq(q->queue_lock);
62 }
63
64 return count;
65}
66
67#endif /* CONFIG_FAIL_IO_TIMEOUT */
68
69/*
70 * blk_delete_timer - Delete/cancel timer for a given function.
71 * @req: request that we are canceling timer for
72 *
73 */
74void blk_delete_timer(struct request *req)
75{
76 struct request_queue *q = req->q;
77
78 /*
79 * Nothing to detach
80 */
81 if (!q->rq_timed_out_fn || !req->deadline)
82 return;
83
84 list_del_init(&req->timeout_list);
85
86 if (list_empty(&q->timeout_list))
87 del_timer(&q->timeout);
88}
89
90static void blk_rq_timed_out(struct request *req)
91{
92 struct request_queue *q = req->q;
93 enum blk_eh_timer_return ret;
94
95 ret = q->rq_timed_out_fn(req);
96 switch (ret) {
97 case BLK_EH_HANDLED:
98 __blk_complete_request(req);
99 break;
100 case BLK_EH_RESET_TIMER:
101 blk_clear_rq_complete(req);
102 blk_add_timer(req);
103 break;
104 case BLK_EH_NOT_HANDLED:
105 /*
106 * LLD handles this for now but in the future
107 * we can send a request msg to abort the command
108 * and we can move more of the generic scsi eh code to
109 * the blk layer.
110 */
111 break;
112 default:
113 printk(KERN_ERR "block: bad eh return: %d\n", ret);
114 break;
115 }
116}
117
118void blk_rq_timed_out_timer(unsigned long data)
119{
120 struct request_queue *q = (struct request_queue *) data;
121 unsigned long flags, uninitialized_var(next), next_set = 0;
122 struct request *rq, *tmp;
123
124 spin_lock_irqsave(q->queue_lock, flags);
125
126 list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) {
127 if (time_after_eq(jiffies, rq->deadline)) {
128 list_del_init(&rq->timeout_list);
129
130 /*
131 * Check if we raced with end io completion
132 */
133 if (blk_mark_rq_complete(rq))
134 continue;
135 blk_rq_timed_out(rq);
136 }
137 if (!next_set) {
138 next = rq->deadline;
139 next_set = 1;
140 } else if (time_after(next, rq->deadline))
141 next = rq->deadline;
142 }
143
144 if (next_set && !list_empty(&q->timeout_list))
145 mod_timer(&q->timeout, round_jiffies(next));
146
147 spin_unlock_irqrestore(q->queue_lock, flags);
148}
149
150/**
151 * blk_abort_request -- Request request recovery for the specified command
152 * @req: pointer to the request of interest
153 *
154 * This function requests that the block layer start recovery for the
155 * request by deleting the timer and calling the q's timeout function.
156 * LLDDs who implement their own error recovery MAY ignore the timeout
157 * event if they generated blk_abort_req. Must hold queue lock.
158 */
159void blk_abort_request(struct request *req)
160{
161 if (blk_mark_rq_complete(req))
162 return;
163 blk_delete_timer(req);
164 blk_rq_timed_out(req);
165}
166EXPORT_SYMBOL_GPL(blk_abort_request);
167
168/**
169 * blk_add_timer - Start timeout timer for a single request
170 * @req: request that is about to start running.
171 *
172 * Notes:
173 * Each request has its own timer, and as it is added to the queue, we
174 * set up the timer. When the request completes, we cancel the timer.
175 */
176void blk_add_timer(struct request *req)
177{
178 struct request_queue *q = req->q;
179 unsigned long expiry;
180
181 if (!q->rq_timed_out_fn)
182 return;
183
184 BUG_ON(!list_empty(&req->timeout_list));
185 BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags));
186
187 if (req->timeout)
188 req->deadline = jiffies + req->timeout;
189 else {
190 req->deadline = jiffies + q->rq_timeout;
191 /*
192 * Some LLDs, like scsi, peek at the timeout to prevent
193 * a command from being retried forever.
194 */
195 req->timeout = q->rq_timeout;
196 }
197 list_add_tail(&req->timeout_list, &q->timeout_list);
198
199 /*
200 * If the timer isn't already pending or this timeout is earlier
201 * than an existing one, modify the timer. Round to next nearest
202 * second.
203 */
204 expiry = round_jiffies(req->deadline);
205
206 /*
207 * We use ->deadline == 0 to detect whether a timer was added or
208 * not, so just increase to next jiffy for that specific case
209 */
210 if (unlikely(!req->deadline))
211 req->deadline = 1;
212
213 if (!timer_pending(&q->timeout) ||
214 time_before(expiry, q->timeout.expires))
215 mod_timer(&q->timeout, expiry);
216}
217
218/**
219 * blk_abort_queue -- Abort all request on given queue
220 * @queue: pointer to queue
221 *
222 */
223void blk_abort_queue(struct request_queue *q)
224{
225 unsigned long flags;
226 struct request *rq, *tmp;
227
228 spin_lock_irqsave(q->queue_lock, flags);
229
230 elv_abort_queue(q);
231
232 list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
233 blk_abort_request(rq);
234
235 spin_unlock_irqrestore(q->queue_lock, flags);
236
237}
238EXPORT_SYMBOL_GPL(blk_abort_queue);
diff --git a/block/blk.h b/block/blk.h
index c79f30e1df52..e5c579769963 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,42 @@ void __blk_queue_free_tags(struct request_queue *q);
17 17
18void blk_unplug_work(struct work_struct *work); 18void blk_unplug_work(struct work_struct *work);
19void blk_unplug_timeout(unsigned long data); 19void blk_unplug_timeout(unsigned long data);
20void blk_rq_timed_out_timer(unsigned long data);
21void blk_delete_timer(struct request *);
22void blk_add_timer(struct request *);
23
24/*
25 * Internal atomic flags for request handling
26 */
27enum rq_atomic_flags {
28 REQ_ATOM_COMPLETE = 0,
29};
30
31/*
32 * EH timer and IO completion will both attempt to 'grab' the request, make
33 * sure that only one of them suceeds
34 */
35static inline int blk_mark_rq_complete(struct request *rq)
36{
37 return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
38}
39
40static inline void blk_clear_rq_complete(struct request *rq)
41{
42 clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
43}
44
45#ifdef CONFIG_FAIL_IO_TIMEOUT
46int blk_should_fake_timeout(struct request_queue *);
47ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
48ssize_t part_timeout_store(struct device *, struct device_attribute *,
49 const char *, size_t);
50#else
51static inline int blk_should_fake_timeout(struct request_queue *q)
52{
53 return 0;
54}
55#endif
20 56
21struct io_context *current_io_context(gfp_t gfp_flags, int node); 57struct io_context *current_io_context(gfp_t gfp_flags, int node);
22 58
@@ -59,4 +95,16 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
59 95
60#endif /* BLK_DEV_INTEGRITY */ 96#endif /* BLK_DEV_INTEGRITY */
61 97
98static inline int blk_cpu_to_group(int cpu)
99{
100#ifdef CONFIG_SCHED_MC
101 cpumask_t mask = cpu_coregroup_map(cpu);
102 return first_cpu(mask);
103#elif defined(CONFIG_SCHED_SMT)
104 return first_cpu(per_cpu(cpu_sibling_map, cpu));
105#else
106 return cpu;
107#endif
108}
109
62#endif 110#endif
diff --git a/block/blktrace.c b/block/blktrace.c
index eb9651ccb241..85049a7e7a17 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -111,23 +111,9 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
111 */ 111 */
112static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; 112static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
113 113
114/* 114/* The ilog2() calls fall out because they're constant */
115 * Bio action bits of interest 115#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
116 */ 116 (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
117static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) };
118
119/*
120 * More could be added as needed, taking care to increment the decrementer
121 * to get correct indexing
122 */
123#define trace_barrier_bit(rw) \
124 (((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0))
125#define trace_sync_bit(rw) \
126 (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1))
127#define trace_ahead_bit(rw) \
128 (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD))
129#define trace_meta_bit(rw) \
130 (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3))
131 117
132/* 118/*
133 * The worker for the various blk_add_trace*() types. Fills out a 119 * The worker for the various blk_add_trace*() types. Fills out a
@@ -147,10 +133,11 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
147 return; 133 return;
148 134
149 what |= ddir_act[rw & WRITE]; 135 what |= ddir_act[rw & WRITE];
150 what |= bio_act[trace_barrier_bit(rw)]; 136 what |= MASK_TC_BIT(rw, BARRIER);
151 what |= bio_act[trace_sync_bit(rw)]; 137 what |= MASK_TC_BIT(rw, SYNC);
152 what |= bio_act[trace_ahead_bit(rw)]; 138 what |= MASK_TC_BIT(rw, AHEAD);
153 what |= bio_act[trace_meta_bit(rw)]; 139 what |= MASK_TC_BIT(rw, META);
140 what |= MASK_TC_BIT(rw, DISCARD);
154 141
155 pid = tsk->pid; 142 pid = tsk->pid;
156 if (unlikely(act_log_check(bt, what, sector, pid))) 143 if (unlikely(act_log_check(bt, what, sector, pid)))
@@ -382,7 +369,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
382 if (!buts->buf_size || !buts->buf_nr) 369 if (!buts->buf_size || !buts->buf_nr)
383 return -EINVAL; 370 return -EINVAL;
384 371
385 strcpy(buts->name, name); 372 strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
373 buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
386 374
387 /* 375 /*
388 * some device names have larger paths - convert the slashes 376 * some device names have larger paths - convert the slashes
diff --git a/block/bsg.c b/block/bsg.c
index 0aae8d7ba99c..56cb343c76d8 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -283,7 +283,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm)
283 next_rq->cmd_type = rq->cmd_type; 283 next_rq->cmd_type = rq->cmd_type;
284 284
285 dxferp = (void*)(unsigned long)hdr->din_xferp; 285 dxferp = (void*)(unsigned long)hdr->din_xferp;
286 ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len); 286 ret = blk_rq_map_user(q, next_rq, NULL, dxferp,
287 hdr->din_xfer_len, GFP_KERNEL);
287 if (ret) 288 if (ret)
288 goto out; 289 goto out;
289 } 290 }
@@ -298,7 +299,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm)
298 dxfer_len = 0; 299 dxfer_len = 0;
299 300
300 if (dxfer_len) { 301 if (dxfer_len) {
301 ret = blk_rq_map_user(q, rq, dxferp, dxfer_len); 302 ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len,
303 GFP_KERNEL);
302 if (ret) 304 if (ret)
303 goto out; 305 goto out;
304 } 306 }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1e2aff812ee2..6a062eebbd15 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -39,6 +39,7 @@ static int cfq_slice_idle = HZ / 125;
39#define CFQ_MIN_TT (2) 39#define CFQ_MIN_TT (2)
40 40
41#define CFQ_SLICE_SCALE (5) 41#define CFQ_SLICE_SCALE (5)
42#define CFQ_HW_QUEUE_MIN (5)
42 43
43#define RQ_CIC(rq) \ 44#define RQ_CIC(rq) \
44 ((struct cfq_io_context *) (rq)->elevator_private) 45 ((struct cfq_io_context *) (rq)->elevator_private)
@@ -86,7 +87,14 @@ struct cfq_data {
86 87
87 int rq_in_driver; 88 int rq_in_driver;
88 int sync_flight; 89 int sync_flight;
90
91 /*
92 * queue-depth detection
93 */
94 int rq_queued;
89 int hw_tag; 95 int hw_tag;
96 int hw_tag_samples;
97 int rq_in_driver_peak;
90 98
91 /* 99 /*
92 * idle window management 100 * idle window management
@@ -244,7 +252,7 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
244{ 252{
245 if (cfqd->busy_queues) { 253 if (cfqd->busy_queues) {
246 cfq_log(cfqd, "schedule dispatch"); 254 cfq_log(cfqd, "schedule dispatch");
247 kblockd_schedule_work(&cfqd->unplug_work); 255 kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
248 } 256 }
249} 257}
250 258
@@ -654,15 +662,6 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
654 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", 662 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
655 cfqd->rq_in_driver); 663 cfqd->rq_in_driver);
656 664
657 /*
658 * If the depth is larger 1, it really could be queueing. But lets
659 * make the mark a little higher - idling could still be good for
660 * low queueing, and a low queueing number could also just indicate
661 * a SCSI mid layer like behaviour where limit+1 is often seen.
662 */
663 if (!cfqd->hw_tag && cfqd->rq_in_driver > 4)
664 cfqd->hw_tag = 1;
665
666 cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; 665 cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors;
667} 666}
668 667
@@ -686,6 +685,7 @@ static void cfq_remove_request(struct request *rq)
686 list_del_init(&rq->queuelist); 685 list_del_init(&rq->queuelist);
687 cfq_del_rq_rb(rq); 686 cfq_del_rq_rb(rq);
688 687
688 cfqq->cfqd->rq_queued--;
689 if (rq_is_meta(rq)) { 689 if (rq_is_meta(rq)) {
690 WARN_ON(!cfqq->meta_pending); 690 WARN_ON(!cfqq->meta_pending);
691 cfqq->meta_pending--; 691 cfqq->meta_pending--;
@@ -878,6 +878,14 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
878 struct cfq_io_context *cic; 878 struct cfq_io_context *cic;
879 unsigned long sl; 879 unsigned long sl;
880 880
881 /*
882 * SSD device without seek penalty, disable idling. But only do so
883 * for devices that support queuing, otherwise we still have a problem
884 * with sync vs async workloads.
885 */
886 if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag)
887 return;
888
881 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); 889 WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list));
882 WARN_ON(cfq_cfqq_slice_new(cfqq)); 890 WARN_ON(cfq_cfqq_slice_new(cfqq));
883 891
@@ -1833,6 +1841,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1833{ 1841{
1834 struct cfq_io_context *cic = RQ_CIC(rq); 1842 struct cfq_io_context *cic = RQ_CIC(rq);
1835 1843
1844 cfqd->rq_queued++;
1836 if (rq_is_meta(rq)) 1845 if (rq_is_meta(rq))
1837 cfqq->meta_pending++; 1846 cfqq->meta_pending++;
1838 1847
@@ -1880,6 +1889,31 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
1880 cfq_rq_enqueued(cfqd, cfqq, rq); 1889 cfq_rq_enqueued(cfqd, cfqq, rq);
1881} 1890}
1882 1891
1892/*
1893 * Update hw_tag based on peak queue depth over 50 samples under
1894 * sufficient load.
1895 */
1896static void cfq_update_hw_tag(struct cfq_data *cfqd)
1897{
1898 if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak)
1899 cfqd->rq_in_driver_peak = cfqd->rq_in_driver;
1900
1901 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
1902 cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN)
1903 return;
1904
1905 if (cfqd->hw_tag_samples++ < 50)
1906 return;
1907
1908 if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN)
1909 cfqd->hw_tag = 1;
1910 else
1911 cfqd->hw_tag = 0;
1912
1913 cfqd->hw_tag_samples = 0;
1914 cfqd->rq_in_driver_peak = 0;
1915}
1916
1883static void cfq_completed_request(struct request_queue *q, struct request *rq) 1917static void cfq_completed_request(struct request_queue *q, struct request *rq)
1884{ 1918{
1885 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1919 struct cfq_queue *cfqq = RQ_CFQQ(rq);
@@ -1890,6 +1924,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
1890 now = jiffies; 1924 now = jiffies;
1891 cfq_log_cfqq(cfqd, cfqq, "complete"); 1925 cfq_log_cfqq(cfqd, cfqq, "complete");
1892 1926
1927 cfq_update_hw_tag(cfqd);
1928
1893 WARN_ON(!cfqd->rq_in_driver); 1929 WARN_ON(!cfqd->rq_in_driver);
1894 WARN_ON(!cfqq->dispatched); 1930 WARN_ON(!cfqq->dispatched);
1895 cfqd->rq_in_driver--; 1931 cfqd->rq_in_driver--;
@@ -2200,6 +2236,7 @@ static void *cfq_init_queue(struct request_queue *q)
2200 cfqd->cfq_slice[1] = cfq_slice_sync; 2236 cfqd->cfq_slice[1] = cfq_slice_sync;
2201 cfqd->cfq_slice_async_rq = cfq_slice_async_rq; 2237 cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
2202 cfqd->cfq_slice_idle = cfq_slice_idle; 2238 cfqd->cfq_slice_idle = cfq_slice_idle;
2239 cfqd->hw_tag = 1;
2203 2240
2204 return cfqd; 2241 return cfqd;
2205} 2242}
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
index 79c14996ac11..e669aed4c6bc 100644
--- a/block/cmd-filter.c
+++ b/block/cmd-filter.c
@@ -211,14 +211,10 @@ int blk_register_filter(struct gendisk *disk)
211{ 211{
212 int ret; 212 int ret;
213 struct blk_cmd_filter *filter = &disk->queue->cmd_filter; 213 struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
214 struct kobject *parent = kobject_get(disk->holder_dir->parent);
215 214
216 if (!parent) 215 ret = kobject_init_and_add(&filter->kobj, &rcf_ktype,
217 return -ENODEV; 216 &disk_to_dev(disk)->kobj,
218
219 ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
220 "%s", "cmd_filter"); 217 "%s", "cmd_filter");
221
222 if (ret < 0) 218 if (ret < 0)
223 return ret; 219 return ret;
224 220
@@ -231,7 +227,6 @@ void blk_unregister_filter(struct gendisk *disk)
231 struct blk_cmd_filter *filter = &disk->queue->cmd_filter; 227 struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
232 228
233 kobject_put(&filter->kobj); 229 kobject_put(&filter->kobj);
234 kobject_put(disk->holder_dir->parent);
235} 230}
236EXPORT_SYMBOL(blk_unregister_filter); 231EXPORT_SYMBOL(blk_unregister_filter);
237#endif 232#endif
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index c23177e4623f..1e559fba7bdf 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -788,6 +788,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
788 return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); 788 return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
789 case BLKFLSBUF: 789 case BLKFLSBUF:
790 case BLKROSET: 790 case BLKROSET:
791 case BLKDISCARD:
791 /* 792 /*
792 * the ones below are implemented in blkdev_locked_ioctl, 793 * the ones below are implemented in blkdev_locked_ioctl,
793 * but we call blkdev_ioctl, which gets the lock for us 794 * but we call blkdev_ioctl, which gets the lock for us
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 342448c3d2dd..fd311179f44c 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -33,7 +33,7 @@ struct deadline_data {
33 */ 33 */
34 struct rb_root sort_list[2]; 34 struct rb_root sort_list[2];
35 struct list_head fifo_list[2]; 35 struct list_head fifo_list[2];
36 36
37 /* 37 /*
38 * next in sort order. read, write or both are NULL 38 * next in sort order. read, write or both are NULL
39 */ 39 */
@@ -53,7 +53,11 @@ struct deadline_data {
53 53
54static void deadline_move_request(struct deadline_data *, struct request *); 54static void deadline_move_request(struct deadline_data *, struct request *);
55 55
56#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))]) 56static inline struct rb_root *
57deadline_rb_root(struct deadline_data *dd, struct request *rq)
58{
59 return &dd->sort_list[rq_data_dir(rq)];
60}
57 61
58/* 62/*
59 * get the request after `rq' in sector-sorted order 63 * get the request after `rq' in sector-sorted order
@@ -72,15 +76,11 @@ deadline_latter_request(struct request *rq)
72static void 76static void
73deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) 77deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
74{ 78{
75 struct rb_root *root = RQ_RB_ROOT(dd, rq); 79 struct rb_root *root = deadline_rb_root(dd, rq);
76 struct request *__alias; 80 struct request *__alias;
77 81
78retry: 82 while (unlikely(__alias = elv_rb_add(root, rq)))
79 __alias = elv_rb_add(root, rq);
80 if (unlikely(__alias)) {
81 deadline_move_request(dd, __alias); 83 deadline_move_request(dd, __alias);
82 goto retry;
83 }
84} 84}
85 85
86static inline void 86static inline void
@@ -91,7 +91,7 @@ deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
91 if (dd->next_rq[data_dir] == rq) 91 if (dd->next_rq[data_dir] == rq)
92 dd->next_rq[data_dir] = deadline_latter_request(rq); 92 dd->next_rq[data_dir] = deadline_latter_request(rq);
93 93
94 elv_rb_del(RQ_RB_ROOT(dd, rq), rq); 94 elv_rb_del(deadline_rb_root(dd, rq), rq);
95} 95}
96 96
97/* 97/*
@@ -106,7 +106,7 @@ deadline_add_request(struct request_queue *q, struct request *rq)
106 deadline_add_rq_rb(dd, rq); 106 deadline_add_rq_rb(dd, rq);
107 107
108 /* 108 /*
109 * set expire time (only used for reads) and add to fifo list 109 * set expire time and add to fifo list
110 */ 110 */
111 rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]); 111 rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
112 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); 112 list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
@@ -162,7 +162,7 @@ static void deadline_merged_request(struct request_queue *q,
162 * if the merge was a front merge, we need to reposition request 162 * if the merge was a front merge, we need to reposition request
163 */ 163 */
164 if (type == ELEVATOR_FRONT_MERGE) { 164 if (type == ELEVATOR_FRONT_MERGE) {
165 elv_rb_del(RQ_RB_ROOT(dd, req), req); 165 elv_rb_del(deadline_rb_root(dd, req), req);
166 deadline_add_rq_rb(dd, req); 166 deadline_add_rq_rb(dd, req);
167 } 167 }
168} 168}
@@ -212,7 +212,7 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
212 dd->next_rq[WRITE] = NULL; 212 dd->next_rq[WRITE] = NULL;
213 dd->next_rq[data_dir] = deadline_latter_request(rq); 213 dd->next_rq[data_dir] = deadline_latter_request(rq);
214 214
215 dd->last_sector = rq->sector + rq->nr_sectors; 215 dd->last_sector = rq_end_sector(rq);
216 216
217 /* 217 /*
218 * take it off the sort and fifo list, move 218 * take it off the sort and fifo list, move
@@ -222,7 +222,7 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
222} 222}
223 223
224/* 224/*
225 * deadline_check_fifo returns 0 if there are no expired reads on the fifo, 225 * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
226 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) 226 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
227 */ 227 */
228static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) 228static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
@@ -258,17 +258,9 @@ static int deadline_dispatch_requests(struct request_queue *q, int force)
258 else 258 else
259 rq = dd->next_rq[READ]; 259 rq = dd->next_rq[READ];
260 260
261 if (rq) { 261 if (rq && dd->batching < dd->fifo_batch)
262 /* we have a "next request" */ 262 /* we have a next request are still entitled to batch */
263 263 goto dispatch_request;
264 if (dd->last_sector != rq->sector)
265 /* end the batch on a non sequential request */
266 dd->batching += dd->fifo_batch;
267
268 if (dd->batching < dd->fifo_batch)
269 /* we are still entitled to batch */
270 goto dispatch_request;
271 }
272 264
273 /* 265 /*
274 * at this point we are not running a batch. select the appropriate 266 * at this point we are not running a batch. select the appropriate
diff --git a/block/elevator.c b/block/elevator.c
index ed6f8f32d27e..04518921db31 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -34,8 +34,9 @@
34#include <linux/delay.h> 34#include <linux/delay.h>
35#include <linux/blktrace_api.h> 35#include <linux/blktrace_api.h>
36#include <linux/hash.h> 36#include <linux/hash.h>
37#include <linux/uaccess.h>
37 38
38#include <asm/uaccess.h> 39#include "blk.h"
39 40
40static DEFINE_SPINLOCK(elv_list_lock); 41static DEFINE_SPINLOCK(elv_list_lock);
41static LIST_HEAD(elv_list); 42static LIST_HEAD(elv_list);
@@ -75,6 +76,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
75 return 0; 76 return 0;
76 77
77 /* 78 /*
79 * Don't merge file system requests and discard requests
80 */
81 if (bio_discard(bio) != bio_discard(rq->bio))
82 return 0;
83
84 /*
78 * different data direction or already started, don't merge 85 * different data direction or already started, don't merge
79 */ 86 */
80 if (bio_data_dir(bio) != rq_data_dir(rq)) 87 if (bio_data_dir(bio) != rq_data_dir(rq))
@@ -438,6 +445,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
438 list_for_each_prev(entry, &q->queue_head) { 445 list_for_each_prev(entry, &q->queue_head) {
439 struct request *pos = list_entry_rq(entry); 446 struct request *pos = list_entry_rq(entry);
440 447
448 if (blk_discard_rq(rq) != blk_discard_rq(pos))
449 break;
441 if (rq_data_dir(rq) != rq_data_dir(pos)) 450 if (rq_data_dir(rq) != rq_data_dir(pos))
442 break; 451 break;
443 if (pos->cmd_flags & stop_flags) 452 if (pos->cmd_flags & stop_flags)
@@ -607,7 +616,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
607 break; 616 break;
608 617
609 case ELEVATOR_INSERT_SORT: 618 case ELEVATOR_INSERT_SORT:
610 BUG_ON(!blk_fs_request(rq)); 619 BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq));
611 rq->cmd_flags |= REQ_SORTED; 620 rq->cmd_flags |= REQ_SORTED;
612 q->nr_sorted++; 621 q->nr_sorted++;
613 if (rq_mergeable(rq)) { 622 if (rq_mergeable(rq)) {
@@ -692,7 +701,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where,
692 * this request is scheduling boundary, update 701 * this request is scheduling boundary, update
693 * end_sector 702 * end_sector
694 */ 703 */
695 if (blk_fs_request(rq)) { 704 if (blk_fs_request(rq) || blk_discard_rq(rq)) {
696 q->end_sector = rq_end_sector(rq); 705 q->end_sector = rq_end_sector(rq);
697 q->boundary_rq = rq; 706 q->boundary_rq = rq;
698 } 707 }
@@ -745,7 +754,7 @@ struct request *elv_next_request(struct request_queue *q)
745 * not ever see it. 754 * not ever see it.
746 */ 755 */
747 if (blk_empty_barrier(rq)) { 756 if (blk_empty_barrier(rq)) {
748 end_queued_request(rq, 1); 757 __blk_end_request(rq, 0, blk_rq_bytes(rq));
749 continue; 758 continue;
750 } 759 }
751 if (!(rq->cmd_flags & REQ_STARTED)) { 760 if (!(rq->cmd_flags & REQ_STARTED)) {
@@ -764,6 +773,12 @@ struct request *elv_next_request(struct request_queue *q)
764 */ 773 */
765 rq->cmd_flags |= REQ_STARTED; 774 rq->cmd_flags |= REQ_STARTED;
766 blk_add_trace_rq(q, rq, BLK_TA_ISSUE); 775 blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
776
777 /*
778 * We are now handing the request to the hardware,
779 * add the timeout handler
780 */
781 blk_add_timer(rq);
767 } 782 }
768 783
769 if (!q->boundary_rq || q->boundary_rq == rq) { 784 if (!q->boundary_rq || q->boundary_rq == rq) {
@@ -782,7 +797,6 @@ struct request *elv_next_request(struct request_queue *q)
782 * device can handle 797 * device can handle
783 */ 798 */
784 rq->nr_phys_segments++; 799 rq->nr_phys_segments++;
785 rq->nr_hw_segments++;
786 } 800 }
787 801
788 if (!q->prep_rq_fn) 802 if (!q->prep_rq_fn)
@@ -805,14 +819,13 @@ struct request *elv_next_request(struct request_queue *q)
805 * so that we don't add it again 819 * so that we don't add it again
806 */ 820 */
807 --rq->nr_phys_segments; 821 --rq->nr_phys_segments;
808 --rq->nr_hw_segments;
809 } 822 }
810 823
811 rq = NULL; 824 rq = NULL;
812 break; 825 break;
813 } else if (ret == BLKPREP_KILL) { 826 } else if (ret == BLKPREP_KILL) {
814 rq->cmd_flags |= REQ_QUIET; 827 rq->cmd_flags |= REQ_QUIET;
815 end_queued_request(rq, 0); 828 __blk_end_request(rq, -EIO, blk_rq_bytes(rq));
816 } else { 829 } else {
817 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); 830 printk(KERN_ERR "%s: bad return=%d\n", __func__, ret);
818 break; 831 break;
@@ -901,6 +914,19 @@ int elv_may_queue(struct request_queue *q, int rw)
901 return ELV_MQUEUE_MAY; 914 return ELV_MQUEUE_MAY;
902} 915}
903 916
917void elv_abort_queue(struct request_queue *q)
918{
919 struct request *rq;
920
921 while (!list_empty(&q->queue_head)) {
922 rq = list_entry_rq(q->queue_head.next);
923 rq->cmd_flags |= REQ_QUIET;
924 blk_add_trace_rq(q, rq, BLK_TA_ABORT);
925 __blk_end_request(rq, -EIO, blk_rq_bytes(rq));
926 }
927}
928EXPORT_SYMBOL(elv_abort_queue);
929
904void elv_completed_request(struct request_queue *q, struct request *rq) 930void elv_completed_request(struct request_queue *q, struct request *rq)
905{ 931{
906 elevator_t *e = q->elevator; 932 elevator_t *e = q->elevator;
diff --git a/block/genhd.c b/block/genhd.c
index e0ce23ac2ece..4cd3433c99ac 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -16,6 +16,7 @@
16#include <linux/kobj_map.h> 16#include <linux/kobj_map.h>
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19#include <linux/idr.h>
19 20
20#include "blk.h" 21#include "blk.h"
21 22
@@ -24,8 +25,194 @@ static DEFINE_MUTEX(block_class_lock);
24struct kobject *block_depr; 25struct kobject *block_depr;
25#endif 26#endif
26 27
28/* for extended dynamic devt allocation, currently only one major is used */
29#define MAX_EXT_DEVT (1 << MINORBITS)
30
31/* For extended devt allocation. ext_devt_mutex prevents look up
32 * results from going away underneath its user.
33 */
34static DEFINE_MUTEX(ext_devt_mutex);
35static DEFINE_IDR(ext_devt_idr);
36
27static struct device_type disk_type; 37static struct device_type disk_type;
28 38
39/**
40 * disk_get_part - get partition
41 * @disk: disk to look partition from
42 * @partno: partition number
43 *
44 * Look for partition @partno from @disk. If found, increment
45 * reference count and return it.
46 *
47 * CONTEXT:
48 * Don't care.
49 *
50 * RETURNS:
51 * Pointer to the found partition on success, NULL if not found.
52 */
53struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
54{
55 struct hd_struct *part = NULL;
56 struct disk_part_tbl *ptbl;
57
58 if (unlikely(partno < 0))
59 return NULL;
60
61 rcu_read_lock();
62
63 ptbl = rcu_dereference(disk->part_tbl);
64 if (likely(partno < ptbl->len)) {
65 part = rcu_dereference(ptbl->part[partno]);
66 if (part)
67 get_device(part_to_dev(part));
68 }
69
70 rcu_read_unlock();
71
72 return part;
73}
74EXPORT_SYMBOL_GPL(disk_get_part);
75
76/**
77 * disk_part_iter_init - initialize partition iterator
78 * @piter: iterator to initialize
79 * @disk: disk to iterate over
80 * @flags: DISK_PITER_* flags
81 *
82 * Initialize @piter so that it iterates over partitions of @disk.
83 *
84 * CONTEXT:
85 * Don't care.
86 */
87void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
88 unsigned int flags)
89{
90 struct disk_part_tbl *ptbl;
91
92 rcu_read_lock();
93 ptbl = rcu_dereference(disk->part_tbl);
94
95 piter->disk = disk;
96 piter->part = NULL;
97
98 if (flags & DISK_PITER_REVERSE)
99 piter->idx = ptbl->len - 1;
100 else if (flags & DISK_PITER_INCL_PART0)
101 piter->idx = 0;
102 else
103 piter->idx = 1;
104
105 piter->flags = flags;
106
107 rcu_read_unlock();
108}
109EXPORT_SYMBOL_GPL(disk_part_iter_init);
110
111/**
112 * disk_part_iter_next - proceed iterator to the next partition and return it
113 * @piter: iterator of interest
114 *
115 * Proceed @piter to the next partition and return it.
116 *
117 * CONTEXT:
118 * Don't care.
119 */
120struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
121{
122 struct disk_part_tbl *ptbl;
123 int inc, end;
124
125 /* put the last partition */
126 disk_put_part(piter->part);
127 piter->part = NULL;
128
129 /* get part_tbl */
130 rcu_read_lock();
131 ptbl = rcu_dereference(piter->disk->part_tbl);
132
133 /* determine iteration parameters */
134 if (piter->flags & DISK_PITER_REVERSE) {
135 inc = -1;
136 if (piter->flags & DISK_PITER_INCL_PART0)
137 end = -1;
138 else
139 end = 0;
140 } else {
141 inc = 1;
142 end = ptbl->len;
143 }
144
145 /* iterate to the next partition */
146 for (; piter->idx != end; piter->idx += inc) {
147 struct hd_struct *part;
148
149 part = rcu_dereference(ptbl->part[piter->idx]);
150 if (!part)
151 continue;
152 if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
153 continue;
154
155 get_device(part_to_dev(part));
156 piter->part = part;
157 piter->idx += inc;
158 break;
159 }
160
161 rcu_read_unlock();
162
163 return piter->part;
164}
165EXPORT_SYMBOL_GPL(disk_part_iter_next);
166
167/**
168 * disk_part_iter_exit - finish up partition iteration
169 * @piter: iter of interest
170 *
171 * Called when iteration is over. Cleans up @piter.
172 *
173 * CONTEXT:
174 * Don't care.
175 */
176void disk_part_iter_exit(struct disk_part_iter *piter)
177{
178 disk_put_part(piter->part);
179 piter->part = NULL;
180}
181EXPORT_SYMBOL_GPL(disk_part_iter_exit);
182
183/**
184 * disk_map_sector_rcu - map sector to partition
185 * @disk: gendisk of interest
186 * @sector: sector to map
187 *
188 * Find out which partition @sector maps to on @disk. This is
189 * primarily used for stats accounting.
190 *
191 * CONTEXT:
192 * RCU read locked. The returned partition pointer is valid only
193 * while preemption is disabled.
194 *
195 * RETURNS:
196 * Found partition on success, part0 is returned if no partition matches
197 */
198struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
199{
200 struct disk_part_tbl *ptbl;
201 int i;
202
203 ptbl = rcu_dereference(disk->part_tbl);
204
205 for (i = 1; i < ptbl->len; i++) {
206 struct hd_struct *part = rcu_dereference(ptbl->part[i]);
207
208 if (part && part->start_sect <= sector &&
209 sector < part->start_sect + part->nr_sects)
210 return part;
211 }
212 return &disk->part0;
213}
214EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
215
29/* 216/*
30 * Can be deleted altogether. Later. 217 * Can be deleted altogether. Later.
31 * 218 *
@@ -43,14 +230,14 @@ static inline int major_to_index(int major)
43} 230}
44 231
45#ifdef CONFIG_PROC_FS 232#ifdef CONFIG_PROC_FS
46void blkdev_show(struct seq_file *f, off_t offset) 233void blkdev_show(struct seq_file *seqf, off_t offset)
47{ 234{
48 struct blk_major_name *dp; 235 struct blk_major_name *dp;
49 236
50 if (offset < BLKDEV_MAJOR_HASH_SIZE) { 237 if (offset < BLKDEV_MAJOR_HASH_SIZE) {
51 mutex_lock(&block_class_lock); 238 mutex_lock(&block_class_lock);
52 for (dp = major_names[offset]; dp; dp = dp->next) 239 for (dp = major_names[offset]; dp; dp = dp->next)
53 seq_printf(f, "%3d %s\n", dp->major, dp->name); 240 seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
54 mutex_unlock(&block_class_lock); 241 mutex_unlock(&block_class_lock);
55 } 242 }
56} 243}
@@ -136,6 +323,118 @@ EXPORT_SYMBOL(unregister_blkdev);
136 323
137static struct kobj_map *bdev_map; 324static struct kobj_map *bdev_map;
138 325
326/**
327 * blk_mangle_minor - scatter minor numbers apart
328 * @minor: minor number to mangle
329 *
330 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
331 * is enabled. Mangling twice gives the original value.
332 *
333 * RETURNS:
334 * Mangled value.
335 *
336 * CONTEXT:
337 * Don't care.
338 */
339static int blk_mangle_minor(int minor)
340{
341#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
342 int i;
343
344 for (i = 0; i < MINORBITS / 2; i++) {
345 int low = minor & (1 << i);
346 int high = minor & (1 << (MINORBITS - 1 - i));
347 int distance = MINORBITS - 1 - 2 * i;
348
349 minor ^= low | high; /* clear both bits */
350 low <<= distance; /* swap the positions */
351 high >>= distance;
352 minor |= low | high; /* and set */
353 }
354#endif
355 return minor;
356}
357
358/**
359 * blk_alloc_devt - allocate a dev_t for a partition
360 * @part: partition to allocate dev_t for
361 * @gfp_mask: memory allocation flag
362 * @devt: out parameter for resulting dev_t
363 *
364 * Allocate a dev_t for block device.
365 *
366 * RETURNS:
367 * 0 on success, allocated dev_t is returned in *@devt. -errno on
368 * failure.
369 *
370 * CONTEXT:
371 * Might sleep.
372 */
373int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
374{
375 struct gendisk *disk = part_to_disk(part);
376 int idx, rc;
377
378 /* in consecutive minor range? */
379 if (part->partno < disk->minors) {
380 *devt = MKDEV(disk->major, disk->first_minor + part->partno);
381 return 0;
382 }
383
384 /* allocate ext devt */
385 do {
386 if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
387 return -ENOMEM;
388 rc = idr_get_new(&ext_devt_idr, part, &idx);
389 } while (rc == -EAGAIN);
390
391 if (rc)
392 return rc;
393
394 if (idx > MAX_EXT_DEVT) {
395 idr_remove(&ext_devt_idr, idx);
396 return -EBUSY;
397 }
398
399 *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
400 return 0;
401}
402
403/**
404 * blk_free_devt - free a dev_t
405 * @devt: dev_t to free
406 *
407 * Free @devt which was allocated using blk_alloc_devt().
408 *
409 * CONTEXT:
410 * Might sleep.
411 */
412void blk_free_devt(dev_t devt)
413{
414 might_sleep();
415
416 if (devt == MKDEV(0, 0))
417 return;
418
419 if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
420 mutex_lock(&ext_devt_mutex);
421 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
422 mutex_unlock(&ext_devt_mutex);
423 }
424}
425
426static char *bdevt_str(dev_t devt, char *buf)
427{
428 if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
429 char tbuf[BDEVT_SIZE];
430 snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
431 snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
432 } else
433 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
434
435 return buf;
436}
437
139/* 438/*
140 * Register device numbers dev..(dev+range-1) 439 * Register device numbers dev..(dev+range-1)
141 * range must be nonzero 440 * range must be nonzero
@@ -157,11 +456,11 @@ void blk_unregister_region(dev_t devt, unsigned long range)
157 456
158EXPORT_SYMBOL(blk_unregister_region); 457EXPORT_SYMBOL(blk_unregister_region);
159 458
160static struct kobject *exact_match(dev_t devt, int *part, void *data) 459static struct kobject *exact_match(dev_t devt, int *partno, void *data)
161{ 460{
162 struct gendisk *p = data; 461 struct gendisk *p = data;
163 462
164 return &p->dev.kobj; 463 return &disk_to_dev(p)->kobj;
165} 464}
166 465
167static int exact_lock(dev_t devt, void *data) 466static int exact_lock(dev_t devt, void *data)
@@ -179,21 +478,46 @@ static int exact_lock(dev_t devt, void *data)
179 * 478 *
180 * This function registers the partitioning information in @disk 479 * This function registers the partitioning information in @disk
181 * with the kernel. 480 * with the kernel.
481 *
482 * FIXME: error handling
182 */ 483 */
183void add_disk(struct gendisk *disk) 484void add_disk(struct gendisk *disk)
184{ 485{
185 struct backing_dev_info *bdi; 486 struct backing_dev_info *bdi;
487 dev_t devt;
186 int retval; 488 int retval;
187 489
490 /* minors == 0 indicates to use ext devt from part0 and should
491 * be accompanied with EXT_DEVT flag. Make sure all
492 * parameters make sense.
493 */
494 WARN_ON(disk->minors && !(disk->major || disk->first_minor));
495 WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
496
188 disk->flags |= GENHD_FL_UP; 497 disk->flags |= GENHD_FL_UP;
189 blk_register_region(MKDEV(disk->major, disk->first_minor), 498
190 disk->minors, NULL, exact_match, exact_lock, disk); 499 retval = blk_alloc_devt(&disk->part0, &devt);
500 if (retval) {
501 WARN_ON(1);
502 return;
503 }
504 disk_to_dev(disk)->devt = devt;
505
506 /* ->major and ->first_minor aren't supposed to be
507 * dereferenced from here on, but set them just in case.
508 */
509 disk->major = MAJOR(devt);
510 disk->first_minor = MINOR(devt);
511
512 blk_register_region(disk_devt(disk), disk->minors, NULL,
513 exact_match, exact_lock, disk);
191 register_disk(disk); 514 register_disk(disk);
192 blk_register_queue(disk); 515 blk_register_queue(disk);
193 516
194 bdi = &disk->queue->backing_dev_info; 517 bdi = &disk->queue->backing_dev_info;
195 bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); 518 bdi_register_dev(bdi, disk_devt(disk));
196 retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); 519 retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
520 "bdi");
197 WARN_ON(retval); 521 WARN_ON(retval);
198} 522}
199 523
@@ -202,78 +526,71 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
202 526
203void unlink_gendisk(struct gendisk *disk) 527void unlink_gendisk(struct gendisk *disk)
204{ 528{
205 sysfs_remove_link(&disk->dev.kobj, "bdi"); 529 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
206 bdi_unregister(&disk->queue->backing_dev_info); 530 bdi_unregister(&disk->queue->backing_dev_info);
207 blk_unregister_queue(disk); 531 blk_unregister_queue(disk);
208 blk_unregister_region(MKDEV(disk->major, disk->first_minor), 532 blk_unregister_region(disk_devt(disk), disk->minors);
209 disk->minors);
210} 533}
211 534
212/** 535/**
213 * get_gendisk - get partitioning information for a given device 536 * get_gendisk - get partitioning information for a given device
214 * @dev: device to get partitioning information for 537 * @devt: device to get partitioning information for
538 * @part: returned partition index
215 * 539 *
216 * This function gets the structure containing partitioning 540 * This function gets the structure containing partitioning
217 * information for the given device @dev. 541 * information for the given device @devt.
218 */ 542 */
219struct gendisk *get_gendisk(dev_t devt, int *part) 543struct gendisk *get_gendisk(dev_t devt, int *partno)
220{ 544{
221 struct kobject *kobj = kobj_lookup(bdev_map, devt, part); 545 struct gendisk *disk = NULL;
222 struct device *dev = kobj_to_dev(kobj); 546
547 if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
548 struct kobject *kobj;
549
550 kobj = kobj_lookup(bdev_map, devt, partno);
551 if (kobj)
552 disk = dev_to_disk(kobj_to_dev(kobj));
553 } else {
554 struct hd_struct *part;
223 555
224 return kobj ? dev_to_disk(dev) : NULL; 556 mutex_lock(&ext_devt_mutex);
557 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
558 if (part && get_disk(part_to_disk(part))) {
559 *partno = part->partno;
560 disk = part_to_disk(part);
561 }
562 mutex_unlock(&ext_devt_mutex);
563 }
564
565 return disk;
225} 566}
226 567
227/* 568/**
228 * print a partitions - intended for places where the root filesystem can't be 569 * bdget_disk - do bdget() by gendisk and partition number
229 * mounted and thus to give the victim some idea of what went wrong 570 * @disk: gendisk of interest
571 * @partno: partition number
572 *
573 * Find partition @partno from @disk, do bdget() on it.
574 *
575 * CONTEXT:
576 * Don't care.
577 *
578 * RETURNS:
579 * Resulting block_device on success, NULL on failure.
230 */ 580 */
231static int printk_partition(struct device *dev, void *data) 581struct block_device *bdget_disk(struct gendisk *disk, int partno)
232{ 582{
233 struct gendisk *sgp; 583 struct hd_struct *part;
234 char buf[BDEVNAME_SIZE]; 584 struct block_device *bdev = NULL;
235 int n;
236
237 if (dev->type != &disk_type)
238 goto exit;
239 585
240 sgp = dev_to_disk(dev); 586 part = disk_get_part(disk, partno);
241 /* 587 if (part)
242 * Don't show empty devices or things that have been surpressed 588 bdev = bdget(part_devt(part));
243 */ 589 disk_put_part(part);
244 if (get_capacity(sgp) == 0 ||
245 (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
246 goto exit;
247 590
248 /* 591 return bdev;
249 * Note, unlike /proc/partitions, I am showing the numbers in
250 * hex - the same format as the root= option takes.
251 */
252 printk("%02x%02x %10llu %s",
253 sgp->major, sgp->first_minor,
254 (unsigned long long)get_capacity(sgp) >> 1,
255 disk_name(sgp, 0, buf));
256 if (sgp->driverfs_dev != NULL &&
257 sgp->driverfs_dev->driver != NULL)
258 printk(" driver: %s\n",
259 sgp->driverfs_dev->driver->name);
260 else
261 printk(" (driver?)\n");
262
263 /* now show the partitions */
264 for (n = 0; n < sgp->minors - 1; ++n) {
265 if (sgp->part[n] == NULL)
266 goto exit;
267 if (sgp->part[n]->nr_sects == 0)
268 goto exit;
269 printk(" %02x%02x %10llu %s\n",
270 sgp->major, n + 1 + sgp->first_minor,
271 (unsigned long long)sgp->part[n]->nr_sects >> 1,
272 disk_name(sgp, n + 1, buf));
273 }
274exit:
275 return 0;
276} 592}
593EXPORT_SYMBOL(bdget_disk);
277 594
278/* 595/*
279 * print a full list of all partitions - intended for places where the root 596 * print a full list of all partitions - intended for places where the root
@@ -282,120 +599,145 @@ exit:
282 */ 599 */
283void __init printk_all_partitions(void) 600void __init printk_all_partitions(void)
284{ 601{
285 mutex_lock(&block_class_lock); 602 struct class_dev_iter iter;
286 class_for_each_device(&block_class, NULL, NULL, printk_partition); 603 struct device *dev;
287 mutex_unlock(&block_class_lock); 604
605 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
606 while ((dev = class_dev_iter_next(&iter))) {
607 struct gendisk *disk = dev_to_disk(dev);
608 struct disk_part_iter piter;
609 struct hd_struct *part;
610 char name_buf[BDEVNAME_SIZE];
611 char devt_buf[BDEVT_SIZE];
612
613 /*
614 * Don't show empty devices or things that have been
615 * surpressed
616 */
617 if (get_capacity(disk) == 0 ||
618 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
619 continue;
620
621 /*
622 * Note, unlike /proc/partitions, I am showing the
623 * numbers in hex - the same format as the root=
624 * option takes.
625 */
626 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
627 while ((part = disk_part_iter_next(&piter))) {
628 bool is_part0 = part == &disk->part0;
629
630 printk("%s%s %10llu %s", is_part0 ? "" : " ",
631 bdevt_str(part_devt(part), devt_buf),
632 (unsigned long long)part->nr_sects >> 1,
633 disk_name(disk, part->partno, name_buf));
634 if (is_part0) {
635 if (disk->driverfs_dev != NULL &&
636 disk->driverfs_dev->driver != NULL)
637 printk(" driver: %s\n",
638 disk->driverfs_dev->driver->name);
639 else
640 printk(" (driver?)\n");
641 } else
642 printk("\n");
643 }
644 disk_part_iter_exit(&piter);
645 }
646 class_dev_iter_exit(&iter);
288} 647}
289 648
290#ifdef CONFIG_PROC_FS 649#ifdef CONFIG_PROC_FS
291/* iterator */ 650/* iterator */
292static int find_start(struct device *dev, void *data) 651static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
293{ 652{
294 loff_t *k = data; 653 loff_t skip = *pos;
654 struct class_dev_iter *iter;
655 struct device *dev;
295 656
296 if (dev->type != &disk_type) 657 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
297 return 0; 658 if (!iter)
298 if (!*k) 659 return ERR_PTR(-ENOMEM);
299 return 1; 660
300 (*k)--; 661 seqf->private = iter;
301 return 0; 662 class_dev_iter_init(iter, &block_class, NULL, &disk_type);
663 do {
664 dev = class_dev_iter_next(iter);
665 if (!dev)
666 return NULL;
667 } while (skip--);
668
669 return dev_to_disk(dev);
302} 670}
303 671
304static void *part_start(struct seq_file *part, loff_t *pos) 672static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
305{ 673{
306 struct device *dev; 674 struct device *dev;
307 loff_t k = *pos;
308
309 if (!k)
310 part->private = (void *)1LU; /* tell show to print header */
311 675
312 mutex_lock(&block_class_lock); 676 (*pos)++;
313 dev = class_find_device(&block_class, NULL, &k, find_start); 677 dev = class_dev_iter_next(seqf->private);
314 if (dev) { 678 if (dev)
315 put_device(dev);
316 return dev_to_disk(dev); 679 return dev_to_disk(dev);
317 } 680
318 return NULL; 681 return NULL;
319} 682}
320 683
321static int find_next(struct device *dev, void *data) 684static void disk_seqf_stop(struct seq_file *seqf, void *v)
322{ 685{
323 if (dev->type == &disk_type) 686 struct class_dev_iter *iter = seqf->private;
324 return 1;
325 return 0;
326}
327 687
328static void *part_next(struct seq_file *part, void *v, loff_t *pos) 688 /* stop is called even after start failed :-( */
329{ 689 if (iter) {
330 struct gendisk *gp = v; 690 class_dev_iter_exit(iter);
331 struct device *dev; 691 kfree(iter);
332 ++*pos;
333 dev = class_find_device(&block_class, &gp->dev, NULL, find_next);
334 if (dev) {
335 put_device(dev);
336 return dev_to_disk(dev);
337 } 692 }
338 return NULL;
339} 693}
340 694
341static void part_stop(struct seq_file *part, void *v) 695static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
342{ 696{
343 mutex_unlock(&block_class_lock); 697 static void *p;
698
699 p = disk_seqf_start(seqf, pos);
700 if (!IS_ERR(p) && p && !*pos)
701 seq_puts(seqf, "major minor #blocks name\n\n");
702 return p;
344} 703}
345 704
346static int show_partition(struct seq_file *part, void *v) 705static int show_partition(struct seq_file *seqf, void *v)
347{ 706{
348 struct gendisk *sgp = v; 707 struct gendisk *sgp = v;
349 int n; 708 struct disk_part_iter piter;
709 struct hd_struct *part;
350 char buf[BDEVNAME_SIZE]; 710 char buf[BDEVNAME_SIZE];
351 711
352 /*
353 * Print header if start told us to do. This is to preserve
354 * the original behavior of not printing header if no
355 * partition exists. This hackery will be removed later with
356 * class iteration clean up.
357 */
358 if (part->private) {
359 seq_puts(part, "major minor #blocks name\n\n");
360 part->private = NULL;
361 }
362
363 /* Don't show non-partitionable removeable devices or empty devices */ 712 /* Don't show non-partitionable removeable devices or empty devices */
364 if (!get_capacity(sgp) || 713 if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
365 (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE))) 714 (sgp->flags & GENHD_FL_REMOVABLE)))
366 return 0; 715 return 0;
367 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) 716 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
368 return 0; 717 return 0;
369 718
370 /* show the full disk and all non-0 size partitions of it */ 719 /* show the full disk and all non-0 size partitions of it */
371 seq_printf(part, "%4d %4d %10llu %s\n", 720 disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
372 sgp->major, sgp->first_minor, 721 while ((part = disk_part_iter_next(&piter)))
373 (unsigned long long)get_capacity(sgp) >> 1, 722 seq_printf(seqf, "%4d %7d %10llu %s\n",
374 disk_name(sgp, 0, buf)); 723 MAJOR(part_devt(part)), MINOR(part_devt(part)),
375 for (n = 0; n < sgp->minors - 1; n++) { 724 (unsigned long long)part->nr_sects >> 1,
376 if (!sgp->part[n]) 725 disk_name(sgp, part->partno, buf));
377 continue; 726 disk_part_iter_exit(&piter);
378 if (sgp->part[n]->nr_sects == 0)
379 continue;
380 seq_printf(part, "%4d %4d %10llu %s\n",
381 sgp->major, n + 1 + sgp->first_minor,
382 (unsigned long long)sgp->part[n]->nr_sects >> 1 ,
383 disk_name(sgp, n + 1, buf));
384 }
385 727
386 return 0; 728 return 0;
387} 729}
388 730
389const struct seq_operations partitions_op = { 731const struct seq_operations partitions_op = {
390 .start = part_start, 732 .start = show_partition_start,
391 .next = part_next, 733 .next = disk_seqf_next,
392 .stop = part_stop, 734 .stop = disk_seqf_stop,
393 .show = show_partition 735 .show = show_partition
394}; 736};
395#endif 737#endif
396 738
397 739
398static struct kobject *base_probe(dev_t devt, int *part, void *data) 740static struct kobject *base_probe(dev_t devt, int *partno, void *data)
399{ 741{
400 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 742 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
401 /* Make old-style 2.4 aliases work */ 743 /* Make old-style 2.4 aliases work */
@@ -431,29 +773,29 @@ static ssize_t disk_range_show(struct device *dev,
431 return sprintf(buf, "%d\n", disk->minors); 773 return sprintf(buf, "%d\n", disk->minors);
432} 774}
433 775
434static ssize_t disk_removable_show(struct device *dev, 776static ssize_t disk_ext_range_show(struct device *dev,
435 struct device_attribute *attr, char *buf) 777 struct device_attribute *attr, char *buf)
436{ 778{
437 struct gendisk *disk = dev_to_disk(dev); 779 struct gendisk *disk = dev_to_disk(dev);
438 780
439 return sprintf(buf, "%d\n", 781 return sprintf(buf, "%d\n", disk_max_parts(disk));
440 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
441} 782}
442 783
443static ssize_t disk_ro_show(struct device *dev, 784static ssize_t disk_removable_show(struct device *dev,
444 struct device_attribute *attr, char *buf) 785 struct device_attribute *attr, char *buf)
445{ 786{
446 struct gendisk *disk = dev_to_disk(dev); 787 struct gendisk *disk = dev_to_disk(dev);
447 788
448 return sprintf(buf, "%d\n", disk->policy ? 1 : 0); 789 return sprintf(buf, "%d\n",
790 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
449} 791}
450 792
451static ssize_t disk_size_show(struct device *dev, 793static ssize_t disk_ro_show(struct device *dev,
452 struct device_attribute *attr, char *buf) 794 struct device_attribute *attr, char *buf)
453{ 795{
454 struct gendisk *disk = dev_to_disk(dev); 796 struct gendisk *disk = dev_to_disk(dev);
455 797
456 return sprintf(buf, "%llu\n", (unsigned long long)get_capacity(disk)); 798 return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
457} 799}
458 800
459static ssize_t disk_capability_show(struct device *dev, 801static ssize_t disk_capability_show(struct device *dev,
@@ -464,73 +806,26 @@ static ssize_t disk_capability_show(struct device *dev,
464 return sprintf(buf, "%x\n", disk->flags); 806 return sprintf(buf, "%x\n", disk->flags);
465} 807}
466 808
467static ssize_t disk_stat_show(struct device *dev,
468 struct device_attribute *attr, char *buf)
469{
470 struct gendisk *disk = dev_to_disk(dev);
471
472 preempt_disable();
473 disk_round_stats(disk);
474 preempt_enable();
475 return sprintf(buf,
476 "%8lu %8lu %8llu %8u "
477 "%8lu %8lu %8llu %8u "
478 "%8u %8u %8u"
479 "\n",
480 disk_stat_read(disk, ios[READ]),
481 disk_stat_read(disk, merges[READ]),
482 (unsigned long long)disk_stat_read(disk, sectors[READ]),
483 jiffies_to_msecs(disk_stat_read(disk, ticks[READ])),
484 disk_stat_read(disk, ios[WRITE]),
485 disk_stat_read(disk, merges[WRITE]),
486 (unsigned long long)disk_stat_read(disk, sectors[WRITE]),
487 jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])),
488 disk->in_flight,
489 jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
490 jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
491}
492
493#ifdef CONFIG_FAIL_MAKE_REQUEST
494static ssize_t disk_fail_show(struct device *dev,
495 struct device_attribute *attr, char *buf)
496{
497 struct gendisk *disk = dev_to_disk(dev);
498
499 return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0);
500}
501
502static ssize_t disk_fail_store(struct device *dev,
503 struct device_attribute *attr,
504 const char *buf, size_t count)
505{
506 struct gendisk *disk = dev_to_disk(dev);
507 int i;
508
509 if (count > 0 && sscanf(buf, "%d", &i) > 0) {
510 if (i == 0)
511 disk->flags &= ~GENHD_FL_FAIL;
512 else
513 disk->flags |= GENHD_FL_FAIL;
514 }
515
516 return count;
517}
518
519#endif
520
521static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 809static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
810static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
522static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 811static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
523static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); 812static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
524static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL); 813static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
525static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 814static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
526static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL); 815static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
527#ifdef CONFIG_FAIL_MAKE_REQUEST 816#ifdef CONFIG_FAIL_MAKE_REQUEST
528static struct device_attribute dev_attr_fail = 817static struct device_attribute dev_attr_fail =
529 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store); 818 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
819#endif
820#ifdef CONFIG_FAIL_IO_TIMEOUT
821static struct device_attribute dev_attr_fail_timeout =
822 __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show,
823 part_timeout_store);
530#endif 824#endif
531 825
532static struct attribute *disk_attrs[] = { 826static struct attribute *disk_attrs[] = {
533 &dev_attr_range.attr, 827 &dev_attr_range.attr,
828 &dev_attr_ext_range.attr,
534 &dev_attr_removable.attr, 829 &dev_attr_removable.attr,
535 &dev_attr_ro.attr, 830 &dev_attr_ro.attr,
536 &dev_attr_size.attr, 831 &dev_attr_size.attr,
@@ -539,6 +834,9 @@ static struct attribute *disk_attrs[] = {
539#ifdef CONFIG_FAIL_MAKE_REQUEST 834#ifdef CONFIG_FAIL_MAKE_REQUEST
540 &dev_attr_fail.attr, 835 &dev_attr_fail.attr,
541#endif 836#endif
837#ifdef CONFIG_FAIL_IO_TIMEOUT
838 &dev_attr_fail_timeout.attr,
839#endif
542 NULL 840 NULL
543}; 841};
544 842
@@ -551,13 +849,87 @@ static struct attribute_group *disk_attr_groups[] = {
551 NULL 849 NULL
552}; 850};
553 851
852static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
853{
854 struct disk_part_tbl *ptbl =
855 container_of(head, struct disk_part_tbl, rcu_head);
856
857 kfree(ptbl);
858}
859
860/**
861 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
862 * @disk: disk to replace part_tbl for
863 * @new_ptbl: new part_tbl to install
864 *
865 * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The
866 * original ptbl is freed using RCU callback.
867 *
868 * LOCKING:
869 * Matching bd_mutx locked.
870 */
871static void disk_replace_part_tbl(struct gendisk *disk,
872 struct disk_part_tbl *new_ptbl)
873{
874 struct disk_part_tbl *old_ptbl = disk->part_tbl;
875
876 rcu_assign_pointer(disk->part_tbl, new_ptbl);
877 if (old_ptbl)
878 call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
879}
880
881/**
882 * disk_expand_part_tbl - expand disk->part_tbl
883 * @disk: disk to expand part_tbl for
884 * @partno: expand such that this partno can fit in
885 *
886 * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl
887 * uses RCU to allow unlocked dereferencing for stats and other stuff.
888 *
889 * LOCKING:
890 * Matching bd_mutex locked, might sleep.
891 *
892 * RETURNS:
893 * 0 on success, -errno on failure.
894 */
895int disk_expand_part_tbl(struct gendisk *disk, int partno)
896{
897 struct disk_part_tbl *old_ptbl = disk->part_tbl;
898 struct disk_part_tbl *new_ptbl;
899 int len = old_ptbl ? old_ptbl->len : 0;
900 int target = partno + 1;
901 size_t size;
902 int i;
903
904 /* disk_max_parts() is zero during initialization, ignore if so */
905 if (disk_max_parts(disk) && target > disk_max_parts(disk))
906 return -EINVAL;
907
908 if (target <= len)
909 return 0;
910
911 size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
912 new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
913 if (!new_ptbl)
914 return -ENOMEM;
915
916 INIT_RCU_HEAD(&new_ptbl->rcu_head);
917 new_ptbl->len = target;
918
919 for (i = 0; i < len; i++)
920 rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
921
922 disk_replace_part_tbl(disk, new_ptbl);
923 return 0;
924}
925
554static void disk_release(struct device *dev) 926static void disk_release(struct device *dev)
555{ 927{
556 struct gendisk *disk = dev_to_disk(dev); 928 struct gendisk *disk = dev_to_disk(dev);
557 929
558 kfree(disk->random); 930 kfree(disk->random);
559 kfree(disk->part); 931 disk_replace_part_tbl(disk, NULL);
560 free_disk_stats(disk); 932 free_part_stats(&disk->part0);
561 kfree(disk); 933 kfree(disk);
562} 934}
563struct class block_class = { 935struct class block_class = {
@@ -578,83 +950,31 @@ static struct device_type disk_type = {
578 * The output looks suspiciously like /proc/partitions with a bunch of 950 * The output looks suspiciously like /proc/partitions with a bunch of
579 * extra fields. 951 * extra fields.
580 */ 952 */
581 953static int diskstats_show(struct seq_file *seqf, void *v)
582static void *diskstats_start(struct seq_file *part, loff_t *pos)
583{
584 struct device *dev;
585 loff_t k = *pos;
586
587 mutex_lock(&block_class_lock);
588 dev = class_find_device(&block_class, NULL, &k, find_start);
589 if (dev) {
590 put_device(dev);
591 return dev_to_disk(dev);
592 }
593 return NULL;
594}
595
596static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos)
597{
598 struct gendisk *gp = v;
599 struct device *dev;
600
601 ++*pos;
602 dev = class_find_device(&block_class, &gp->dev, NULL, find_next);
603 if (dev) {
604 put_device(dev);
605 return dev_to_disk(dev);
606 }
607 return NULL;
608}
609
610static void diskstats_stop(struct seq_file *part, void *v)
611{
612 mutex_unlock(&block_class_lock);
613}
614
615static int diskstats_show(struct seq_file *s, void *v)
616{ 954{
617 struct gendisk *gp = v; 955 struct gendisk *gp = v;
956 struct disk_part_iter piter;
957 struct hd_struct *hd;
618 char buf[BDEVNAME_SIZE]; 958 char buf[BDEVNAME_SIZE];
619 int n = 0; 959 int cpu;
620 960
621 /* 961 /*
622 if (&gp->dev.kobj.entry == block_class.devices.next) 962 if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
623 seq_puts(s, "major minor name" 963 seq_puts(seqf, "major minor name"
624 " rio rmerge rsect ruse wio wmerge " 964 " rio rmerge rsect ruse wio wmerge "
625 "wsect wuse running use aveq" 965 "wsect wuse running use aveq"
626 "\n\n"); 966 "\n\n");
627 */ 967 */
628 968
629 preempt_disable(); 969 disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0);
630 disk_round_stats(gp); 970 while ((hd = disk_part_iter_next(&piter))) {
631 preempt_enable(); 971 cpu = part_stat_lock();
632 seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", 972 part_round_stats(cpu, hd);
633 gp->major, n + gp->first_minor, disk_name(gp, n, buf), 973 part_stat_unlock();
634 disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), 974 seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
635 (unsigned long long)disk_stat_read(gp, sectors[0]),
636 jiffies_to_msecs(disk_stat_read(gp, ticks[0])),
637 disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]),
638 (unsigned long long)disk_stat_read(gp, sectors[1]),
639 jiffies_to_msecs(disk_stat_read(gp, ticks[1])),
640 gp->in_flight,
641 jiffies_to_msecs(disk_stat_read(gp, io_ticks)),
642 jiffies_to_msecs(disk_stat_read(gp, time_in_queue)));
643
644 /* now show all non-0 size partitions of it */
645 for (n = 0; n < gp->minors - 1; n++) {
646 struct hd_struct *hd = gp->part[n];
647
648 if (!hd || !hd->nr_sects)
649 continue;
650
651 preempt_disable();
652 part_round_stats(hd);
653 preempt_enable();
654 seq_printf(s, "%4d %4d %s %lu %lu %llu "
655 "%u %lu %lu %llu %u %u %u %u\n", 975 "%u %lu %lu %llu %u %u %u %u\n",
656 gp->major, n + gp->first_minor + 1, 976 MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
657 disk_name(gp, n + 1, buf), 977 disk_name(gp, hd->partno, buf),
658 part_stat_read(hd, ios[0]), 978 part_stat_read(hd, ios[0]),
659 part_stat_read(hd, merges[0]), 979 part_stat_read(hd, merges[0]),
660 (unsigned long long)part_stat_read(hd, sectors[0]), 980 (unsigned long long)part_stat_read(hd, sectors[0]),
@@ -668,14 +988,15 @@ static int diskstats_show(struct seq_file *s, void *v)
668 jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 988 jiffies_to_msecs(part_stat_read(hd, time_in_queue))
669 ); 989 );
670 } 990 }
991 disk_part_iter_exit(&piter);
671 992
672 return 0; 993 return 0;
673} 994}
674 995
675const struct seq_operations diskstats_op = { 996const struct seq_operations diskstats_op = {
676 .start = diskstats_start, 997 .start = disk_seqf_start,
677 .next = diskstats_next, 998 .next = disk_seqf_next,
678 .stop = diskstats_stop, 999 .stop = disk_seqf_stop,
679 .show = diskstats_show 1000 .show = diskstats_show
680}; 1001};
681#endif /* CONFIG_PROC_FS */ 1002#endif /* CONFIG_PROC_FS */
@@ -690,7 +1011,7 @@ static void media_change_notify_thread(struct work_struct *work)
690 * set enviroment vars to indicate which event this is for 1011 * set enviroment vars to indicate which event this is for
691 * so that user space will know to go check the media status. 1012 * so that user space will know to go check the media status.
692 */ 1013 */
693 kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp); 1014 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
694 put_device(gd->driverfs_dev); 1015 put_device(gd->driverfs_dev);
695} 1016}
696 1017
@@ -703,42 +1024,29 @@ void genhd_media_change_notify(struct gendisk *disk)
703EXPORT_SYMBOL_GPL(genhd_media_change_notify); 1024EXPORT_SYMBOL_GPL(genhd_media_change_notify);
704#endif /* 0 */ 1025#endif /* 0 */
705 1026
706struct find_block { 1027dev_t blk_lookup_devt(const char *name, int partno)
707 const char *name;
708 int part;
709};
710
711static int match_id(struct device *dev, void *data)
712{ 1028{
713 struct find_block *find = data; 1029 dev_t devt = MKDEV(0, 0);
1030 struct class_dev_iter iter;
1031 struct device *dev;
714 1032
715 if (dev->type != &disk_type) 1033 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
716 return 0; 1034 while ((dev = class_dev_iter_next(&iter))) {
717 if (strcmp(dev->bus_id, find->name) == 0) {
718 struct gendisk *disk = dev_to_disk(dev); 1035 struct gendisk *disk = dev_to_disk(dev);
719 if (find->part < disk->minors) 1036 struct hd_struct *part;
720 return 1;
721 }
722 return 0;
723}
724 1037
725dev_t blk_lookup_devt(const char *name, int part) 1038 if (strcmp(dev->bus_id, name))
726{ 1039 continue;
727 struct device *dev;
728 dev_t devt = MKDEV(0, 0);
729 struct find_block find;
730 1040
731 mutex_lock(&block_class_lock); 1041 part = disk_get_part(disk, partno);
732 find.name = name; 1042 if (part) {
733 find.part = part; 1043 devt = part_devt(part);
734 dev = class_find_device(&block_class, NULL, &find, match_id); 1044 disk_put_part(part);
735 if (dev) { 1045 break;
736 put_device(dev); 1046 }
737 devt = MKDEV(MAJOR(dev->devt), 1047 disk_put_part(part);
738 MINOR(dev->devt) + part);
739 } 1048 }
740 mutex_unlock(&block_class_lock); 1049 class_dev_iter_exit(&iter);
741
742 return devt; 1050 return devt;
743} 1051}
744EXPORT_SYMBOL(blk_lookup_devt); 1052EXPORT_SYMBOL(blk_lookup_devt);
@@ -747,6 +1055,7 @@ struct gendisk *alloc_disk(int minors)
747{ 1055{
748 return alloc_disk_node(minors, -1); 1056 return alloc_disk_node(minors, -1);
749} 1057}
1058EXPORT_SYMBOL(alloc_disk);
750 1059
751struct gendisk *alloc_disk_node(int minors, int node_id) 1060struct gendisk *alloc_disk_node(int minors, int node_id)
752{ 1061{
@@ -755,32 +1064,28 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
755 disk = kmalloc_node(sizeof(struct gendisk), 1064 disk = kmalloc_node(sizeof(struct gendisk),
756 GFP_KERNEL | __GFP_ZERO, node_id); 1065 GFP_KERNEL | __GFP_ZERO, node_id);
757 if (disk) { 1066 if (disk) {
758 if (!init_disk_stats(disk)) { 1067 if (!init_part_stats(&disk->part0)) {
759 kfree(disk); 1068 kfree(disk);
760 return NULL; 1069 return NULL;
761 } 1070 }
762 if (minors > 1) { 1071 if (disk_expand_part_tbl(disk, 0)) {
763 int size = (minors - 1) * sizeof(struct hd_struct *); 1072 free_part_stats(&disk->part0);
764 disk->part = kmalloc_node(size, 1073 kfree(disk);
765 GFP_KERNEL | __GFP_ZERO, node_id); 1074 return NULL;
766 if (!disk->part) {
767 free_disk_stats(disk);
768 kfree(disk);
769 return NULL;
770 }
771 } 1075 }
1076 disk->part_tbl->part[0] = &disk->part0;
1077
772 disk->minors = minors; 1078 disk->minors = minors;
773 rand_initialize_disk(disk); 1079 rand_initialize_disk(disk);
774 disk->dev.class = &block_class; 1080 disk_to_dev(disk)->class = &block_class;
775 disk->dev.type = &disk_type; 1081 disk_to_dev(disk)->type = &disk_type;
776 device_initialize(&disk->dev); 1082 device_initialize(disk_to_dev(disk));
777 INIT_WORK(&disk->async_notify, 1083 INIT_WORK(&disk->async_notify,
778 media_change_notify_thread); 1084 media_change_notify_thread);
1085 disk->node_id = node_id;
779 } 1086 }
780 return disk; 1087 return disk;
781} 1088}
782
783EXPORT_SYMBOL(alloc_disk);
784EXPORT_SYMBOL(alloc_disk_node); 1089EXPORT_SYMBOL(alloc_disk_node);
785 1090
786struct kobject *get_disk(struct gendisk *disk) 1091struct kobject *get_disk(struct gendisk *disk)
@@ -793,7 +1098,7 @@ struct kobject *get_disk(struct gendisk *disk)
793 owner = disk->fops->owner; 1098 owner = disk->fops->owner;
794 if (owner && !try_module_get(owner)) 1099 if (owner && !try_module_get(owner))
795 return NULL; 1100 return NULL;
796 kobj = kobject_get(&disk->dev.kobj); 1101 kobj = kobject_get(&disk_to_dev(disk)->kobj);
797 if (kobj == NULL) { 1102 if (kobj == NULL) {
798 module_put(owner); 1103 module_put(owner);
799 return NULL; 1104 return NULL;
@@ -807,27 +1112,28 @@ EXPORT_SYMBOL(get_disk);
807void put_disk(struct gendisk *disk) 1112void put_disk(struct gendisk *disk)
808{ 1113{
809 if (disk) 1114 if (disk)
810 kobject_put(&disk->dev.kobj); 1115 kobject_put(&disk_to_dev(disk)->kobj);
811} 1116}
812 1117
813EXPORT_SYMBOL(put_disk); 1118EXPORT_SYMBOL(put_disk);
814 1119
815void set_device_ro(struct block_device *bdev, int flag) 1120void set_device_ro(struct block_device *bdev, int flag)
816{ 1121{
817 if (bdev->bd_contains != bdev) 1122 bdev->bd_part->policy = flag;
818 bdev->bd_part->policy = flag;
819 else
820 bdev->bd_disk->policy = flag;
821} 1123}
822 1124
823EXPORT_SYMBOL(set_device_ro); 1125EXPORT_SYMBOL(set_device_ro);
824 1126
825void set_disk_ro(struct gendisk *disk, int flag) 1127void set_disk_ro(struct gendisk *disk, int flag)
826{ 1128{
827 int i; 1129 struct disk_part_iter piter;
828 disk->policy = flag; 1130 struct hd_struct *part;
829 for (i = 0; i < disk->minors - 1; i++) 1131
830 if (disk->part[i]) disk->part[i]->policy = flag; 1132 disk_part_iter_init(&piter, disk,
1133 DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0);
1134 while ((part = disk_part_iter_next(&piter)))
1135 part->policy = flag;
1136 disk_part_iter_exit(&piter);
831} 1137}
832 1138
833EXPORT_SYMBOL(set_disk_ro); 1139EXPORT_SYMBOL(set_disk_ro);
@@ -836,18 +1142,15 @@ int bdev_read_only(struct block_device *bdev)
836{ 1142{
837 if (!bdev) 1143 if (!bdev)
838 return 0; 1144 return 0;
839 else if (bdev->bd_contains != bdev) 1145 return bdev->bd_part->policy;
840 return bdev->bd_part->policy;
841 else
842 return bdev->bd_disk->policy;
843} 1146}
844 1147
845EXPORT_SYMBOL(bdev_read_only); 1148EXPORT_SYMBOL(bdev_read_only);
846 1149
847int invalidate_partition(struct gendisk *disk, int index) 1150int invalidate_partition(struct gendisk *disk, int partno)
848{ 1151{
849 int res = 0; 1152 int res = 0;
850 struct block_device *bdev = bdget_disk(disk, index); 1153 struct block_device *bdev = bdget_disk(disk, partno);
851 if (bdev) { 1154 if (bdev) {
852 fsync_bdev(bdev); 1155 fsync_bdev(bdev);
853 res = __invalidate_device(bdev); 1156 res = __invalidate_device(bdev);
diff --git a/block/ioctl.c b/block/ioctl.c
index 77185e5c026a..38bee321e1fa 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -12,11 +12,12 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
12{ 12{
13 struct block_device *bdevp; 13 struct block_device *bdevp;
14 struct gendisk *disk; 14 struct gendisk *disk;
15 struct hd_struct *part;
15 struct blkpg_ioctl_arg a; 16 struct blkpg_ioctl_arg a;
16 struct blkpg_partition p; 17 struct blkpg_partition p;
18 struct disk_part_iter piter;
17 long long start, length; 19 long long start, length;
18 int part; 20 int partno;
19 int i;
20 int err; 21 int err;
21 22
22 if (!capable(CAP_SYS_ADMIN)) 23 if (!capable(CAP_SYS_ADMIN))
@@ -28,8 +29,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
28 disk = bdev->bd_disk; 29 disk = bdev->bd_disk;
29 if (bdev != bdev->bd_contains) 30 if (bdev != bdev->bd_contains)
30 return -EINVAL; 31 return -EINVAL;
31 part = p.pno; 32 partno = p.pno;
32 if (part <= 0 || part >= disk->minors) 33 if (partno <= 0)
33 return -EINVAL; 34 return -EINVAL;
34 switch (a.op) { 35 switch (a.op) {
35 case BLKPG_ADD_PARTITION: 36 case BLKPG_ADD_PARTITION:
@@ -43,36 +44,37 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
43 || pstart < 0 || plength < 0) 44 || pstart < 0 || plength < 0)
44 return -EINVAL; 45 return -EINVAL;
45 } 46 }
46 /* partition number in use? */ 47
47 mutex_lock(&bdev->bd_mutex); 48 mutex_lock(&bdev->bd_mutex);
48 if (disk->part[part - 1]) {
49 mutex_unlock(&bdev->bd_mutex);
50 return -EBUSY;
51 }
52 /* overlap? */
53 for (i = 0; i < disk->minors - 1; i++) {
54 struct hd_struct *s = disk->part[i];
55 49
56 if (!s) 50 /* overlap? */
57 continue; 51 disk_part_iter_init(&piter, disk,
58 if (!(start+length <= s->start_sect || 52 DISK_PITER_INCL_EMPTY);
59 start >= s->start_sect + s->nr_sects)) { 53 while ((part = disk_part_iter_next(&piter))) {
54 if (!(start + length <= part->start_sect ||
55 start >= part->start_sect + part->nr_sects)) {
56 disk_part_iter_exit(&piter);
60 mutex_unlock(&bdev->bd_mutex); 57 mutex_unlock(&bdev->bd_mutex);
61 return -EBUSY; 58 return -EBUSY;
62 } 59 }
63 } 60 }
61 disk_part_iter_exit(&piter);
62
64 /* all seems OK */ 63 /* all seems OK */
65 err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE); 64 err = add_partition(disk, partno, start, length,
65 ADDPART_FLAG_NONE);
66 mutex_unlock(&bdev->bd_mutex); 66 mutex_unlock(&bdev->bd_mutex);
67 return err; 67 return err;
68 case BLKPG_DEL_PARTITION: 68 case BLKPG_DEL_PARTITION:
69 if (!disk->part[part-1]) 69 part = disk_get_part(disk, partno);
70 return -ENXIO; 70 if (!part)
71 if (disk->part[part - 1]->nr_sects == 0)
72 return -ENXIO; 71 return -ENXIO;
73 bdevp = bdget_disk(disk, part); 72
73 bdevp = bdget(part_devt(part));
74 disk_put_part(part);
74 if (!bdevp) 75 if (!bdevp)
75 return -ENOMEM; 76 return -ENOMEM;
77
76 mutex_lock(&bdevp->bd_mutex); 78 mutex_lock(&bdevp->bd_mutex);
77 if (bdevp->bd_openers) { 79 if (bdevp->bd_openers) {
78 mutex_unlock(&bdevp->bd_mutex); 80 mutex_unlock(&bdevp->bd_mutex);
@@ -84,7 +86,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
84 invalidate_bdev(bdevp); 86 invalidate_bdev(bdevp);
85 87
86 mutex_lock_nested(&bdev->bd_mutex, 1); 88 mutex_lock_nested(&bdev->bd_mutex, 1);
87 delete_partition(disk, part); 89 delete_partition(disk, partno);
88 mutex_unlock(&bdev->bd_mutex); 90 mutex_unlock(&bdev->bd_mutex);
89 mutex_unlock(&bdevp->bd_mutex); 91 mutex_unlock(&bdevp->bd_mutex);
90 bdput(bdevp); 92 bdput(bdevp);
@@ -100,7 +102,7 @@ static int blkdev_reread_part(struct block_device *bdev)
100 struct gendisk *disk = bdev->bd_disk; 102 struct gendisk *disk = bdev->bd_disk;
101 int res; 103 int res;
102 104
103 if (disk->minors == 1 || bdev != bdev->bd_contains) 105 if (!disk_partitionable(disk) || bdev != bdev->bd_contains)
104 return -EINVAL; 106 return -EINVAL;
105 if (!capable(CAP_SYS_ADMIN)) 107 if (!capable(CAP_SYS_ADMIN))
106 return -EACCES; 108 return -EACCES;
@@ -111,6 +113,69 @@ static int blkdev_reread_part(struct block_device *bdev)
111 return res; 113 return res;
112} 114}
113 115
116static void blk_ioc_discard_endio(struct bio *bio, int err)
117{
118 if (err) {
119 if (err == -EOPNOTSUPP)
120 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
121 clear_bit(BIO_UPTODATE, &bio->bi_flags);
122 }
123 complete(bio->bi_private);
124}
125
126static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
127 uint64_t len)
128{
129 struct request_queue *q = bdev_get_queue(bdev);
130 int ret = 0;
131
132 if (start & 511)
133 return -EINVAL;
134 if (len & 511)
135 return -EINVAL;
136 start >>= 9;
137 len >>= 9;
138
139 if (start + len > (bdev->bd_inode->i_size >> 9))
140 return -EINVAL;
141
142 if (!q->prepare_discard_fn)
143 return -EOPNOTSUPP;
144
145 while (len && !ret) {
146 DECLARE_COMPLETION_ONSTACK(wait);
147 struct bio *bio;
148
149 bio = bio_alloc(GFP_KERNEL, 0);
150 if (!bio)
151 return -ENOMEM;
152
153 bio->bi_end_io = blk_ioc_discard_endio;
154 bio->bi_bdev = bdev;
155 bio->bi_private = &wait;
156 bio->bi_sector = start;
157
158 if (len > q->max_hw_sectors) {
159 bio->bi_size = q->max_hw_sectors << 9;
160 len -= q->max_hw_sectors;
161 start += q->max_hw_sectors;
162 } else {
163 bio->bi_size = len << 9;
164 len = 0;
165 }
166 submit_bio(DISCARD_NOBARRIER, bio);
167
168 wait_for_completion(&wait);
169
170 if (bio_flagged(bio, BIO_EOPNOTSUPP))
171 ret = -EOPNOTSUPP;
172 else if (!bio_flagged(bio, BIO_UPTODATE))
173 ret = -EIO;
174 bio_put(bio);
175 }
176 return ret;
177}
178
114static int put_ushort(unsigned long arg, unsigned short val) 179static int put_ushort(unsigned long arg, unsigned short val)
115{ 180{
116 return put_user(val, (unsigned short __user *)arg); 181 return put_user(val, (unsigned short __user *)arg);
@@ -258,6 +323,19 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
258 set_device_ro(bdev, n); 323 set_device_ro(bdev, n);
259 unlock_kernel(); 324 unlock_kernel();
260 return 0; 325 return 0;
326
327 case BLKDISCARD: {
328 uint64_t range[2];
329
330 if (!(file->f_mode & FMODE_WRITE))
331 return -EBADF;
332
333 if (copy_from_user(range, (void __user *)arg, sizeof(range)))
334 return -EFAULT;
335
336 return blk_ioctl_discard(bdev, range[0], range[1]);
337 }
338
261 case HDIO_GETGEO: { 339 case HDIO_GETGEO: {
262 struct hd_geometry geo; 340 struct hd_geometry geo;
263 341
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ec4b7f234626..c34272a348fe 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -185,6 +185,7 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
185 __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok); 185 __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
186 __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok); 186 __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
187 __set_bit(GPCMD_SET_STREAMING, filter->write_ok); 187 __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
188 __set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok);
188} 189}
189EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults); 190EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults);
190 191
@@ -313,11 +314,12 @@ static int sg_io(struct file *file, struct request_queue *q,
313 goto out; 314 goto out;
314 } 315 }
315 316
316 ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count, 317 ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count,
317 hdr->dxfer_len); 318 hdr->dxfer_len, GFP_KERNEL);
318 kfree(iov); 319 kfree(iov);
319 } else if (hdr->dxfer_len) 320 } else if (hdr->dxfer_len)
320 ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); 321 ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len,
322 GFP_KERNEL);
321 323
322 if (ret) 324 if (ret)
323 goto out; 325 goto out;
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index f2dd99122bd6..a93247cc395a 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -33,6 +33,7 @@
33 */ 33 */
34 34
35#include <linux/kernel.h> 35#include <linux/kernel.h>
36#include <linux/blkdev.h>
36#include <linux/pci.h> 37#include <linux/pci.h>
37#include <scsi/scsi.h> 38#include <scsi/scsi.h>
38#include <scsi/scsi_host.h> 39#include <scsi/scsi_host.h>
@@ -459,29 +460,29 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
459 * RETURNS: 460 * RETURNS:
460 * EH_HANDLED or EH_NOT_HANDLED 461 * EH_HANDLED or EH_NOT_HANDLED
461 */ 462 */
462enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 463enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
463{ 464{
464 struct Scsi_Host *host = cmd->device->host; 465 struct Scsi_Host *host = cmd->device->host;
465 struct ata_port *ap = ata_shost_to_port(host); 466 struct ata_port *ap = ata_shost_to_port(host);
466 unsigned long flags; 467 unsigned long flags;
467 struct ata_queued_cmd *qc; 468 struct ata_queued_cmd *qc;
468 enum scsi_eh_timer_return ret; 469 enum blk_eh_timer_return ret;
469 470
470 DPRINTK("ENTER\n"); 471 DPRINTK("ENTER\n");
471 472
472 if (ap->ops->error_handler) { 473 if (ap->ops->error_handler) {
473 ret = EH_NOT_HANDLED; 474 ret = BLK_EH_NOT_HANDLED;
474 goto out; 475 goto out;
475 } 476 }
476 477
477 ret = EH_HANDLED; 478 ret = BLK_EH_HANDLED;
478 spin_lock_irqsave(ap->lock, flags); 479 spin_lock_irqsave(ap->lock, flags);
479 qc = ata_qc_from_tag(ap, ap->link.active_tag); 480 qc = ata_qc_from_tag(ap, ap->link.active_tag);
480 if (qc) { 481 if (qc) {
481 WARN_ON(qc->scsicmd != cmd); 482 WARN_ON(qc->scsicmd != cmd);
482 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 483 qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
483 qc->err_mask |= AC_ERR_TIMEOUT; 484 qc->err_mask |= AC_ERR_TIMEOUT;
484 ret = EH_NOT_HANDLED; 485 ret = BLK_EH_NOT_HANDLED;
485 } 486 }
486 spin_unlock_irqrestore(ap->lock, flags); 487 spin_unlock_irqrestore(ap->lock, flags);
487 488
@@ -833,7 +834,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
833 * Note that ATA_QCFLAG_FAILED is unconditionally set after 834 * Note that ATA_QCFLAG_FAILED is unconditionally set after
834 * this function completes. 835 * this function completes.
835 */ 836 */
836 scsi_req_abort_cmd(qc->scsicmd); 837 blk_abort_request(qc->scsicmd->request);
837} 838}
838 839
839/** 840/**
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index fccd5e496c62..59fe051957ef 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1085,6 +1085,10 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
1085 1085
1086 blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN); 1086 blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN);
1087 } else { 1087 } else {
1088 if (ata_id_is_ssd(dev->id))
1089 queue_flag_set_unlocked(QUEUE_FLAG_NONROT,
1090 sdev->request_queue);
1091
1088 /* ATA devices must be sector aligned */ 1092 /* ATA devices must be sector aligned */
1089 blk_queue_update_dma_alignment(sdev->request_queue, 1093 blk_queue_update_dma_alignment(sdev->request_queue,
1090 ATA_SECT_SIZE - 1); 1094 ATA_SECT_SIZE - 1);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index e96de96e3020..fe2839e58774 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -155,7 +155,7 @@ extern int ata_bus_probe(struct ata_port *ap);
155/* libata-eh.c */ 155/* libata-eh.c */
156extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd); 156extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd);
157extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd); 157extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd);
158extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); 158extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd);
159extern void ata_scsi_error(struct Scsi_Host *host); 159extern void ata_scsi_error(struct Scsi_Host *host);
160extern void ata_port_wait_eh(struct ata_port *ap); 160extern void ata_port_wait_eh(struct ata_port *ap);
161extern void ata_eh_fastdrain_timerfn(unsigned long arg); 161extern void ata_eh_fastdrain_timerfn(unsigned long arg);
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 31dc0cd84afa..0a5f055dffba 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -54,7 +54,7 @@ struct driver_private {
54 */ 54 */
55struct class_private { 55struct class_private {
56 struct kset class_subsys; 56 struct kset class_subsys;
57 struct list_head class_devices; 57 struct klist class_devices;
58 struct list_head class_interfaces; 58 struct list_head class_interfaces;
59 struct kset class_dirs; 59 struct kset class_dirs;
60 struct mutex class_mutex; 60 struct mutex class_mutex;
diff --git a/drivers/base/class.c b/drivers/base/class.c
index cc5e28c8885c..eb85e4312301 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -135,6 +135,20 @@ static void remove_class_attrs(struct class *cls)
135 } 135 }
136} 136}
137 137
138static void klist_class_dev_get(struct klist_node *n)
139{
140 struct device *dev = container_of(n, struct device, knode_class);
141
142 get_device(dev);
143}
144
145static void klist_class_dev_put(struct klist_node *n)
146{
147 struct device *dev = container_of(n, struct device, knode_class);
148
149 put_device(dev);
150}
151
138int __class_register(struct class *cls, struct lock_class_key *key) 152int __class_register(struct class *cls, struct lock_class_key *key)
139{ 153{
140 struct class_private *cp; 154 struct class_private *cp;
@@ -145,7 +159,7 @@ int __class_register(struct class *cls, struct lock_class_key *key)
145 cp = kzalloc(sizeof(*cp), GFP_KERNEL); 159 cp = kzalloc(sizeof(*cp), GFP_KERNEL);
146 if (!cp) 160 if (!cp)
147 return -ENOMEM; 161 return -ENOMEM;
148 INIT_LIST_HEAD(&cp->class_devices); 162 klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put);
149 INIT_LIST_HEAD(&cp->class_interfaces); 163 INIT_LIST_HEAD(&cp->class_interfaces);
150 kset_init(&cp->class_dirs); 164 kset_init(&cp->class_dirs);
151 __mutex_init(&cp->class_mutex, "struct class mutex", key); 165 __mutex_init(&cp->class_mutex, "struct class mutex", key);
@@ -269,6 +283,71 @@ char *make_class_name(const char *name, struct kobject *kobj)
269#endif 283#endif
270 284
271/** 285/**
286 * class_dev_iter_init - initialize class device iterator
287 * @iter: class iterator to initialize
288 * @class: the class we wanna iterate over
289 * @start: the device to start iterating from, if any
290 * @type: device_type of the devices to iterate over, NULL for all
291 *
292 * Initialize class iterator @iter such that it iterates over devices
293 * of @class. If @start is set, the list iteration will start there,
294 * otherwise if it is NULL, the iteration starts at the beginning of
295 * the list.
296 */
297void class_dev_iter_init(struct class_dev_iter *iter, struct class *class,
298 struct device *start, const struct device_type *type)
299{
300 struct klist_node *start_knode = NULL;
301
302 if (start)
303 start_knode = &start->knode_class;
304 klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode);
305 iter->type = type;
306}
307EXPORT_SYMBOL_GPL(class_dev_iter_init);
308
309/**
310 * class_dev_iter_next - iterate to the next device
311 * @iter: class iterator to proceed
312 *
313 * Proceed @iter to the next device and return it. Returns NULL if
314 * iteration is complete.
315 *
316 * The returned device is referenced and won't be released till
317 * iterator is proceed to the next device or exited. The caller is
318 * free to do whatever it wants to do with the device including
319 * calling back into class code.
320 */
321struct device *class_dev_iter_next(struct class_dev_iter *iter)
322{
323 struct klist_node *knode;
324 struct device *dev;
325
326 while (1) {
327 knode = klist_next(&iter->ki);
328 if (!knode)
329 return NULL;
330 dev = container_of(knode, struct device, knode_class);
331 if (!iter->type || iter->type == dev->type)
332 return dev;
333 }
334}
335EXPORT_SYMBOL_GPL(class_dev_iter_next);
336
337/**
338 * class_dev_iter_exit - finish iteration
339 * @iter: class iterator to finish
340 *
341 * Finish an iteration. Always call this function after iteration is
342 * complete whether the iteration ran till the end or not.
343 */
344void class_dev_iter_exit(struct class_dev_iter *iter)
345{
346 klist_iter_exit(&iter->ki);
347}
348EXPORT_SYMBOL_GPL(class_dev_iter_exit);
349
350/**
272 * class_for_each_device - device iterator 351 * class_for_each_device - device iterator
273 * @class: the class we're iterating 352 * @class: the class we're iterating
274 * @start: the device to start with in the list, if any. 353 * @start: the device to start with in the list, if any.
@@ -283,13 +362,13 @@ char *make_class_name(const char *name, struct kobject *kobj)
283 * We check the return of @fn each time. If it returns anything 362 * We check the return of @fn each time. If it returns anything
284 * other than 0, we break out and return that value. 363 * other than 0, we break out and return that value.
285 * 364 *
286 * Note, we hold class->class_mutex in this function, so it can not be 365 * @fn is allowed to do anything including calling back into class
287 * re-acquired in @fn, otherwise it will self-deadlocking. For 366 * code. There's no locking restriction.
288 * example, calls to add or remove class members would be verboten.
289 */ 367 */
290int class_for_each_device(struct class *class, struct device *start, 368int class_for_each_device(struct class *class, struct device *start,
291 void *data, int (*fn)(struct device *, void *)) 369 void *data, int (*fn)(struct device *, void *))
292{ 370{
371 struct class_dev_iter iter;
293 struct device *dev; 372 struct device *dev;
294 int error = 0; 373 int error = 0;
295 374
@@ -301,20 +380,13 @@ int class_for_each_device(struct class *class, struct device *start,
301 return -EINVAL; 380 return -EINVAL;
302 } 381 }
303 382
304 mutex_lock(&class->p->class_mutex); 383 class_dev_iter_init(&iter, class, start, NULL);
305 list_for_each_entry(dev, &class->p->class_devices, node) { 384 while ((dev = class_dev_iter_next(&iter))) {
306 if (start) {
307 if (start == dev)
308 start = NULL;
309 continue;
310 }
311 dev = get_device(dev);
312 error = fn(dev, data); 385 error = fn(dev, data);
313 put_device(dev);
314 if (error) 386 if (error)
315 break; 387 break;
316 } 388 }
317 mutex_unlock(&class->p->class_mutex); 389 class_dev_iter_exit(&iter);
318 390
319 return error; 391 return error;
320} 392}
@@ -337,16 +409,15 @@ EXPORT_SYMBOL_GPL(class_for_each_device);
337 * 409 *
338 * Note, you will need to drop the reference with put_device() after use. 410 * Note, you will need to drop the reference with put_device() after use.
339 * 411 *
340 * We hold class->class_mutex in this function, so it can not be 412 * @fn is allowed to do anything including calling back into class
341 * re-acquired in @match, otherwise it will self-deadlocking. For 413 * code. There's no locking restriction.
342 * example, calls to add or remove class members would be verboten.
343 */ 414 */
344struct device *class_find_device(struct class *class, struct device *start, 415struct device *class_find_device(struct class *class, struct device *start,
345 void *data, 416 void *data,
346 int (*match)(struct device *, void *)) 417 int (*match)(struct device *, void *))
347{ 418{
419 struct class_dev_iter iter;
348 struct device *dev; 420 struct device *dev;
349 int found = 0;
350 421
351 if (!class) 422 if (!class)
352 return NULL; 423 return NULL;
@@ -356,29 +427,23 @@ struct device *class_find_device(struct class *class, struct device *start,
356 return NULL; 427 return NULL;
357 } 428 }
358 429
359 mutex_lock(&class->p->class_mutex); 430 class_dev_iter_init(&iter, class, start, NULL);
360 list_for_each_entry(dev, &class->p->class_devices, node) { 431 while ((dev = class_dev_iter_next(&iter))) {
361 if (start) {
362 if (start == dev)
363 start = NULL;
364 continue;
365 }
366 dev = get_device(dev);
367 if (match(dev, data)) { 432 if (match(dev, data)) {
368 found = 1; 433 get_device(dev);
369 break; 434 break;
370 } else 435 }
371 put_device(dev);
372 } 436 }
373 mutex_unlock(&class->p->class_mutex); 437 class_dev_iter_exit(&iter);
374 438
375 return found ? dev : NULL; 439 return dev;
376} 440}
377EXPORT_SYMBOL_GPL(class_find_device); 441EXPORT_SYMBOL_GPL(class_find_device);
378 442
379int class_interface_register(struct class_interface *class_intf) 443int class_interface_register(struct class_interface *class_intf)
380{ 444{
381 struct class *parent; 445 struct class *parent;
446 struct class_dev_iter iter;
382 struct device *dev; 447 struct device *dev;
383 448
384 if (!class_intf || !class_intf->class) 449 if (!class_intf || !class_intf->class)
@@ -391,8 +456,10 @@ int class_interface_register(struct class_interface *class_intf)
391 mutex_lock(&parent->p->class_mutex); 456 mutex_lock(&parent->p->class_mutex);
392 list_add_tail(&class_intf->node, &parent->p->class_interfaces); 457 list_add_tail(&class_intf->node, &parent->p->class_interfaces);
393 if (class_intf->add_dev) { 458 if (class_intf->add_dev) {
394 list_for_each_entry(dev, &parent->p->class_devices, node) 459 class_dev_iter_init(&iter, parent, NULL, NULL);
460 while ((dev = class_dev_iter_next(&iter)))
395 class_intf->add_dev(dev, class_intf); 461 class_intf->add_dev(dev, class_intf);
462 class_dev_iter_exit(&iter);
396 } 463 }
397 mutex_unlock(&parent->p->class_mutex); 464 mutex_unlock(&parent->p->class_mutex);
398 465
@@ -402,6 +469,7 @@ int class_interface_register(struct class_interface *class_intf)
402void class_interface_unregister(struct class_interface *class_intf) 469void class_interface_unregister(struct class_interface *class_intf)
403{ 470{
404 struct class *parent = class_intf->class; 471 struct class *parent = class_intf->class;
472 struct class_dev_iter iter;
405 struct device *dev; 473 struct device *dev;
406 474
407 if (!parent) 475 if (!parent)
@@ -410,8 +478,10 @@ void class_interface_unregister(struct class_interface *class_intf)
410 mutex_lock(&parent->p->class_mutex); 478 mutex_lock(&parent->p->class_mutex);
411 list_del_init(&class_intf->node); 479 list_del_init(&class_intf->node);
412 if (class_intf->remove_dev) { 480 if (class_intf->remove_dev) {
413 list_for_each_entry(dev, &parent->p->class_devices, node) 481 class_dev_iter_init(&iter, parent, NULL, NULL);
482 while ((dev = class_dev_iter_next(&iter)))
414 class_intf->remove_dev(dev, class_intf); 483 class_intf->remove_dev(dev, class_intf);
484 class_dev_iter_exit(&iter);
415 } 485 }
416 mutex_unlock(&parent->p->class_mutex); 486 mutex_unlock(&parent->p->class_mutex);
417 487
diff --git a/drivers/base/core.c b/drivers/base/core.c
index d021c98605b3..b98cb1416a2d 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -536,7 +536,6 @@ void device_initialize(struct device *dev)
536 klist_init(&dev->klist_children, klist_children_get, 536 klist_init(&dev->klist_children, klist_children_get,
537 klist_children_put); 537 klist_children_put);
538 INIT_LIST_HEAD(&dev->dma_pools); 538 INIT_LIST_HEAD(&dev->dma_pools);
539 INIT_LIST_HEAD(&dev->node);
540 init_MUTEX(&dev->sem); 539 init_MUTEX(&dev->sem);
541 spin_lock_init(&dev->devres_lock); 540 spin_lock_init(&dev->devres_lock);
542 INIT_LIST_HEAD(&dev->devres_head); 541 INIT_LIST_HEAD(&dev->devres_head);
@@ -916,7 +915,8 @@ int device_add(struct device *dev)
916 if (dev->class) { 915 if (dev->class) {
917 mutex_lock(&dev->class->p->class_mutex); 916 mutex_lock(&dev->class->p->class_mutex);
918 /* tie the class to the device */ 917 /* tie the class to the device */
919 list_add_tail(&dev->node, &dev->class->p->class_devices); 918 klist_add_tail(&dev->knode_class,
919 &dev->class->p->class_devices);
920 920
921 /* notify any interfaces that the device is here */ 921 /* notify any interfaces that the device is here */
922 list_for_each_entry(class_intf, 922 list_for_each_entry(class_intf,
@@ -1032,7 +1032,7 @@ void device_del(struct device *dev)
1032 if (class_intf->remove_dev) 1032 if (class_intf->remove_dev)
1033 class_intf->remove_dev(dev, class_intf); 1033 class_intf->remove_dev(dev, class_intf);
1034 /* remove the device from the class list */ 1034 /* remove the device from the class list */
1035 list_del_init(&dev->node); 1035 klist_del(&dev->knode_class);
1036 mutex_unlock(&dev->class->p->class_mutex); 1036 mutex_unlock(&dev->class->p->class_mutex);
1037 } 1037 }
1038 device_remove_file(dev, &uevent_attr); 1038 device_remove_file(dev, &uevent_attr);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 0c39782b2660..aa69556c3485 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -109,12 +109,12 @@ static const struct attribute_group attr_group = {
109static int 109static int
110aoedisk_add_sysfs(struct aoedev *d) 110aoedisk_add_sysfs(struct aoedev *d)
111{ 111{
112 return sysfs_create_group(&d->gd->dev.kobj, &attr_group); 112 return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group);
113} 113}
114void 114void
115aoedisk_rm_sysfs(struct aoedev *d) 115aoedisk_rm_sysfs(struct aoedev *d)
116{ 116{
117 sysfs_remove_group(&d->gd->dev.kobj, &attr_group); 117 sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group);
118} 118}
119 119
120static int 120static int
@@ -276,7 +276,7 @@ aoeblk_gdalloc(void *vp)
276 gd->first_minor = d->sysminor * AOE_PARTITIONS; 276 gd->first_minor = d->sysminor * AOE_PARTITIONS;
277 gd->fops = &aoe_bdops; 277 gd->fops = &aoe_bdops;
278 gd->private_data = d; 278 gd->private_data = d;
279 gd->capacity = d->ssize; 279 set_capacity(gd, d->ssize);
280 snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", 280 snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d",
281 d->aoemajor, d->aoeminor); 281 d->aoemajor, d->aoeminor);
282 282
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 2f1746295d06..961d29a53cab 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -645,7 +645,7 @@ aoecmd_sleepwork(struct work_struct *work)
645 unsigned long flags; 645 unsigned long flags;
646 u64 ssize; 646 u64 ssize;
647 647
648 ssize = d->gd->capacity; 648 ssize = get_capacity(d->gd);
649 bd = bdget_disk(d->gd, 0); 649 bd = bdget_disk(d->gd, 0);
650 650
651 if (bd) { 651 if (bd) {
@@ -707,7 +707,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
707 if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) 707 if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE))
708 return; 708 return;
709 if (d->gd != NULL) { 709 if (d->gd != NULL) {
710 d->gd->capacity = ssize; 710 set_capacity(d->gd, ssize);
711 d->flags |= DEVFL_NEWSIZE; 711 d->flags |= DEVFL_NEWSIZE;
712 } else 712 } else
713 d->flags |= DEVFL_GDALLOC; 713 d->flags |= DEVFL_GDALLOC;
@@ -756,12 +756,17 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector
756 unsigned long n_sect = bio->bi_size >> 9; 756 unsigned long n_sect = bio->bi_size >> 9;
757 const int rw = bio_data_dir(bio); 757 const int rw = bio_data_dir(bio);
758 struct hd_struct *part; 758 struct hd_struct *part;
759 int cpu;
759 760
760 part = get_part(disk, sector); 761 cpu = part_stat_lock();
761 all_stat_inc(disk, part, ios[rw], sector); 762 part = disk_map_sector_rcu(disk, sector);
762 all_stat_add(disk, part, ticks[rw], duration, sector); 763
763 all_stat_add(disk, part, sectors[rw], n_sect, sector); 764 part_stat_inc(cpu, part, ios[rw]);
764 all_stat_add(disk, part, io_ticks, duration, sector); 765 part_stat_add(cpu, part, ticks[rw], duration);
766 part_stat_add(cpu, part, sectors[rw], n_sect);
767 part_stat_add(cpu, part, io_ticks, duration);
768
769 part_stat_unlock();
765} 770}
766 771
767void 772void
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index a1d813ab0d6b..6a8038d115b5 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -91,7 +91,7 @@ aoedev_downdev(struct aoedev *d)
91 } 91 }
92 92
93 if (d->gd) 93 if (d->gd)
94 d->gd->capacity = 0; 94 set_capacity(d->gd, 0);
95 95
96 d->flags &= ~DEVFL_UP; 96 d->flags &= ~DEVFL_UP;
97} 97}
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index b73116ef9236..1e1f9153000c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3460,8 +3460,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
3460 hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not"); 3460 hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
3461 3461
3462 hba[i]->cmd_pool_bits = 3462 hba[i]->cmd_pool_bits =
3463 kmalloc(((hba[i]->nr_cmds + BITS_PER_LONG - 3463 kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
3464 1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL); 3464 * sizeof(unsigned long), GFP_KERNEL);
3465 hba[i]->cmd_pool = (CommandList_struct *) 3465 hba[i]->cmd_pool = (CommandList_struct *)
3466 pci_alloc_consistent(hba[i]->pdev, 3466 pci_alloc_consistent(hba[i]->pdev,
3467 hba[i]->nr_cmds * sizeof(CommandList_struct), 3467 hba[i]->nr_cmds * sizeof(CommandList_struct),
@@ -3493,8 +3493,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
3493 /* command and error info recs zeroed out before 3493 /* command and error info recs zeroed out before
3494 they are used */ 3494 they are used */
3495 memset(hba[i]->cmd_pool_bits, 0, 3495 memset(hba[i]->cmd_pool_bits, 0,
3496 ((hba[i]->nr_cmds + BITS_PER_LONG - 3496 DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
3497 1) / BITS_PER_LONG) * sizeof(unsigned long)); 3497 * sizeof(unsigned long));
3498 3498
3499 hba[i]->num_luns = 0; 3499 hba[i]->num_luns = 0;
3500 hba[i]->highest_lun = -1; 3500 hba[i]->highest_lun = -1;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index e1233aabda77..a3fd87b41444 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -365,7 +365,7 @@ struct scsi2map {
365 365
366static int 366static int
367cciss_scsi_add_entry(int ctlr, int hostno, 367cciss_scsi_add_entry(int ctlr, int hostno,
368 unsigned char *scsi3addr, int devtype, 368 struct cciss_scsi_dev_t *device,
369 struct scsi2map *added, int *nadded) 369 struct scsi2map *added, int *nadded)
370{ 370{
371 /* assumes hba[ctlr]->scsi_ctlr->lock is held */ 371 /* assumes hba[ctlr]->scsi_ctlr->lock is held */
@@ -384,12 +384,12 @@ cciss_scsi_add_entry(int ctlr, int hostno,
384 lun = 0; 384 lun = 0;
385 /* Is this device a non-zero lun of a multi-lun device */ 385 /* Is this device a non-zero lun of a multi-lun device */
386 /* byte 4 of the 8-byte LUN addr will contain the logical unit no. */ 386 /* byte 4 of the 8-byte LUN addr will contain the logical unit no. */
387 if (scsi3addr[4] != 0) { 387 if (device->scsi3addr[4] != 0) {
388 /* Search through our list and find the device which */ 388 /* Search through our list and find the device which */
389 /* has the same 8 byte LUN address, excepting byte 4. */ 389 /* has the same 8 byte LUN address, excepting byte 4. */
390 /* Assign the same bus and target for this new LUN. */ 390 /* Assign the same bus and target for this new LUN. */
391 /* Use the logical unit number from the firmware. */ 391 /* Use the logical unit number from the firmware. */
392 memcpy(addr1, scsi3addr, 8); 392 memcpy(addr1, device->scsi3addr, 8);
393 addr1[4] = 0; 393 addr1[4] = 0;
394 for (i = 0; i < n; i++) { 394 for (i = 0; i < n; i++) {
395 sd = &ccissscsi[ctlr].dev[i]; 395 sd = &ccissscsi[ctlr].dev[i];
@@ -399,7 +399,7 @@ cciss_scsi_add_entry(int ctlr, int hostno,
399 if (memcmp(addr1, addr2, 8) == 0) { 399 if (memcmp(addr1, addr2, 8) == 0) {
400 bus = sd->bus; 400 bus = sd->bus;
401 target = sd->target; 401 target = sd->target;
402 lun = scsi3addr[4]; 402 lun = device->scsi3addr[4];
403 break; 403 break;
404 } 404 }
405 } 405 }
@@ -420,8 +420,12 @@ cciss_scsi_add_entry(int ctlr, int hostno,
420 added[*nadded].lun = sd->lun; 420 added[*nadded].lun = sd->lun;
421 (*nadded)++; 421 (*nadded)++;
422 422
423 memcpy(&sd->scsi3addr[0], scsi3addr, 8); 423 memcpy(sd->scsi3addr, device->scsi3addr, 8);
424 sd->devtype = devtype; 424 memcpy(sd->vendor, device->vendor, sizeof(sd->vendor));
425 memcpy(sd->revision, device->revision, sizeof(sd->revision));
426 memcpy(sd->device_id, device->device_id, sizeof(sd->device_id));
427 sd->devtype = device->devtype;
428
425 ccissscsi[ctlr].ndevices++; 429 ccissscsi[ctlr].ndevices++;
426 430
427 /* initially, (before registering with scsi layer) we don't 431 /* initially, (before registering with scsi layer) we don't
@@ -487,6 +491,22 @@ static void fixup_botched_add(int ctlr, char *scsi3addr)
487 CPQ_TAPE_UNLOCK(ctlr, flags); 491 CPQ_TAPE_UNLOCK(ctlr, flags);
488} 492}
489 493
494static int device_is_the_same(struct cciss_scsi_dev_t *dev1,
495 struct cciss_scsi_dev_t *dev2)
496{
497 return dev1->devtype == dev2->devtype &&
498 memcmp(dev1->scsi3addr, dev2->scsi3addr,
499 sizeof(dev1->scsi3addr)) == 0 &&
500 memcmp(dev1->device_id, dev2->device_id,
501 sizeof(dev1->device_id)) == 0 &&
502 memcmp(dev1->vendor, dev2->vendor,
503 sizeof(dev1->vendor)) == 0 &&
504 memcmp(dev1->model, dev2->model,
505 sizeof(dev1->model)) == 0 &&
506 memcmp(dev1->revision, dev2->revision,
507 sizeof(dev1->revision)) == 0;
508}
509
490static int 510static int
491adjust_cciss_scsi_table(int ctlr, int hostno, 511adjust_cciss_scsi_table(int ctlr, int hostno,
492 struct cciss_scsi_dev_t sd[], int nsds) 512 struct cciss_scsi_dev_t sd[], int nsds)
@@ -532,7 +552,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
532 for (j=0;j<nsds;j++) { 552 for (j=0;j<nsds;j++) {
533 if (SCSI3ADDR_EQ(sd[j].scsi3addr, 553 if (SCSI3ADDR_EQ(sd[j].scsi3addr,
534 csd->scsi3addr)) { 554 csd->scsi3addr)) {
535 if (sd[j].devtype == csd->devtype) 555 if (device_is_the_same(&sd[j], csd))
536 found=2; 556 found=2;
537 else 557 else
538 found=1; 558 found=1;
@@ -548,22 +568,26 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
548 cciss_scsi_remove_entry(ctlr, hostno, i, 568 cciss_scsi_remove_entry(ctlr, hostno, i,
549 removed, &nremoved); 569 removed, &nremoved);
550 /* remove ^^^, hence i not incremented */ 570 /* remove ^^^, hence i not incremented */
551 } 571 } else if (found == 1) { /* device is different in some way */
552 else if (found == 1) { /* device is different kind */
553 changes++; 572 changes++;
554 printk("cciss%d: device c%db%dt%dl%d type changed " 573 printk("cciss%d: device c%db%dt%dl%d has changed.\n",
555 "(device type now %s).\n", 574 ctlr, hostno, csd->bus, csd->target, csd->lun);
556 ctlr, hostno, csd->bus, csd->target, csd->lun,
557 scsi_device_type(csd->devtype));
558 cciss_scsi_remove_entry(ctlr, hostno, i, 575 cciss_scsi_remove_entry(ctlr, hostno, i,
559 removed, &nremoved); 576 removed, &nremoved);
560 /* remove ^^^, hence i not incremented */ 577 /* remove ^^^, hence i not incremented */
561 if (cciss_scsi_add_entry(ctlr, hostno, 578 if (cciss_scsi_add_entry(ctlr, hostno, &sd[j],
562 &sd[j].scsi3addr[0], sd[j].devtype,
563 added, &nadded) != 0) 579 added, &nadded) != 0)
564 /* we just removed one, so add can't fail. */ 580 /* we just removed one, so add can't fail. */
565 BUG(); 581 BUG();
566 csd->devtype = sd[j].devtype; 582 csd->devtype = sd[j].devtype;
583 memcpy(csd->device_id, sd[j].device_id,
584 sizeof(csd->device_id));
585 memcpy(csd->vendor, sd[j].vendor,
586 sizeof(csd->vendor));
587 memcpy(csd->model, sd[j].model,
588 sizeof(csd->model));
589 memcpy(csd->revision, sd[j].revision,
590 sizeof(csd->revision));
567 } else /* device is same as it ever was, */ 591 } else /* device is same as it ever was, */
568 i++; /* so just move along. */ 592 i++; /* so just move along. */
569 } 593 }
@@ -577,7 +601,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
577 csd = &ccissscsi[ctlr].dev[j]; 601 csd = &ccissscsi[ctlr].dev[j];
578 if (SCSI3ADDR_EQ(sd[i].scsi3addr, 602 if (SCSI3ADDR_EQ(sd[i].scsi3addr,
579 csd->scsi3addr)) { 603 csd->scsi3addr)) {
580 if (sd[i].devtype == csd->devtype) 604 if (device_is_the_same(&sd[i], csd))
581 found=2; /* found device */ 605 found=2; /* found device */
582 else 606 else
583 found=1; /* found a bug. */ 607 found=1; /* found a bug. */
@@ -586,16 +610,14 @@ adjust_cciss_scsi_table(int ctlr, int hostno,
586 } 610 }
587 if (!found) { 611 if (!found) {
588 changes++; 612 changes++;
589 if (cciss_scsi_add_entry(ctlr, hostno, 613 if (cciss_scsi_add_entry(ctlr, hostno, &sd[i],
590
591 &sd[i].scsi3addr[0], sd[i].devtype,
592 added, &nadded) != 0) 614 added, &nadded) != 0)
593 break; 615 break;
594 } else if (found == 1) { 616 } else if (found == 1) {
595 /* should never happen... */ 617 /* should never happen... */
596 changes++; 618 changes++;
597 printk("cciss%d: device unexpectedly changed type\n", 619 printk(KERN_WARNING "cciss%d: device "
598 ctlr); 620 "unexpectedly changed\n", ctlr);
599 /* but if it does happen, we just ignore that device */ 621 /* but if it does happen, we just ignore that device */
600 } 622 }
601 } 623 }
@@ -1012,7 +1034,8 @@ cciss_scsi_interpret_error(CommandList_struct *cp)
1012 1034
1013static int 1035static int
1014cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, 1036cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr,
1015 unsigned char *buf, unsigned char bufsize) 1037 unsigned char page, unsigned char *buf,
1038 unsigned char bufsize)
1016{ 1039{
1017 int rc; 1040 int rc;
1018 CommandList_struct *cp; 1041 CommandList_struct *cp;
@@ -1032,8 +1055,8 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr,
1032 ei = cp->err_info; 1055 ei = cp->err_info;
1033 1056
1034 cdb[0] = CISS_INQUIRY; 1057 cdb[0] = CISS_INQUIRY;
1035 cdb[1] = 0; 1058 cdb[1] = (page != 0);
1036 cdb[2] = 0; 1059 cdb[2] = page;
1037 cdb[3] = 0; 1060 cdb[3] = 0;
1038 cdb[4] = bufsize; 1061 cdb[4] = bufsize;
1039 cdb[5] = 0; 1062 cdb[5] = 0;
@@ -1053,6 +1076,25 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr,
1053 return rc; 1076 return rc;
1054} 1077}
1055 1078
1079/* Get the device id from inquiry page 0x83 */
1080static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr,
1081 unsigned char *device_id, int buflen)
1082{
1083 int rc;
1084 unsigned char *buf;
1085
1086 if (buflen > 16)
1087 buflen = 16;
1088 buf = kzalloc(64, GFP_KERNEL);
1089 if (!buf)
1090 return -1;
1091 rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64);
1092 if (rc == 0)
1093 memcpy(device_id, &buf[8], buflen);
1094 kfree(buf);
1095 return rc != 0;
1096}
1097
1056static int 1098static int
1057cciss_scsi_do_report_phys_luns(ctlr_info_t *c, 1099cciss_scsi_do_report_phys_luns(ctlr_info_t *c,
1058 ReportLunData_struct *buf, int bufsize) 1100 ReportLunData_struct *buf, int bufsize)
@@ -1142,25 +1184,21 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
1142 ctlr_info_t *c; 1184 ctlr_info_t *c;
1143 __u32 num_luns=0; 1185 __u32 num_luns=0;
1144 unsigned char *ch; 1186 unsigned char *ch;
1145 /* unsigned char found[CCISS_MAX_SCSI_DEVS_PER_HBA]; */ 1187 struct cciss_scsi_dev_t *currentsd, *this_device;
1146 struct cciss_scsi_dev_t currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
1147 int ncurrent=0; 1188 int ncurrent=0;
1148 int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8; 1189 int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8;
1149 int i; 1190 int i;
1150 1191
1151 c = (ctlr_info_t *) hba[cntl_num]; 1192 c = (ctlr_info_t *) hba[cntl_num];
1152 ld_buff = kzalloc(reportlunsize, GFP_KERNEL); 1193 ld_buff = kzalloc(reportlunsize, GFP_KERNEL);
1153 if (ld_buff == NULL) {
1154 printk(KERN_ERR "cciss: out of memory\n");
1155 return;
1156 }
1157 inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL); 1194 inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
1158 if (inq_buff == NULL) { 1195 currentsd = kzalloc(sizeof(*currentsd) *
1159 printk(KERN_ERR "cciss: out of memory\n"); 1196 (CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL);
1160 kfree(ld_buff); 1197 if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) {
1161 return; 1198 printk(KERN_ERR "cciss: out of memory\n");
1199 goto out;
1162 } 1200 }
1163 1201 this_device = &currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
1164 if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) { 1202 if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) {
1165 ch = &ld_buff->LUNListLength[0]; 1203 ch = &ld_buff->LUNListLength[0];
1166 num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8; 1204 num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
@@ -1179,23 +1217,34 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
1179 1217
1180 1218
1181 /* adjust our table of devices */ 1219 /* adjust our table of devices */
1182 for(i=0; i<num_luns; i++) 1220 for (i = 0; i < num_luns; i++) {
1183 {
1184 int devtype;
1185
1186 /* for each physical lun, do an inquiry */ 1221 /* for each physical lun, do an inquiry */
1187 if (ld_buff->LUN[i][3] & 0xC0) continue; 1222 if (ld_buff->LUN[i][3] & 0xC0) continue;
1188 memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE); 1223 memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE);
1189 memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8); 1224 memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8);
1190 1225
1191 if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, inq_buff, 1226 if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff,
1192 (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { 1227 (unsigned char) OBDR_TAPE_INQ_SIZE) != 0)
1193 /* Inquiry failed (msg printed already) */ 1228 /* Inquiry failed (msg printed already) */
1194 devtype = 0; /* so we will skip this device. */ 1229 continue; /* so we will skip this device. */
1195 } else /* what kind of device is this? */ 1230
1196 devtype = (inq_buff[0] & 0x1f); 1231 this_device->devtype = (inq_buff[0] & 0x1f);
1197 1232 this_device->bus = -1;
1198 switch (devtype) 1233 this_device->target = -1;
1234 this_device->lun = -1;
1235 memcpy(this_device->scsi3addr, scsi3addr, 8);
1236 memcpy(this_device->vendor, &inq_buff[8],
1237 sizeof(this_device->vendor));
1238 memcpy(this_device->model, &inq_buff[16],
1239 sizeof(this_device->model));
1240 memcpy(this_device->revision, &inq_buff[32],
1241 sizeof(this_device->revision));
1242 memset(this_device->device_id, 0,
1243 sizeof(this_device->device_id));
1244 cciss_scsi_get_device_id(hba[cntl_num], scsi3addr,
1245 this_device->device_id, sizeof(this_device->device_id));
1246
1247 switch (this_device->devtype)
1199 { 1248 {
1200 case 0x05: /* CD-ROM */ { 1249 case 0x05: /* CD-ROM */ {
1201 1250
@@ -1220,15 +1269,10 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
1220 if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) { 1269 if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
1221 printk(KERN_INFO "cciss%d: %s ignored, " 1270 printk(KERN_INFO "cciss%d: %s ignored, "
1222 "too many devices.\n", cntl_num, 1271 "too many devices.\n", cntl_num,
1223 scsi_device_type(devtype)); 1272 scsi_device_type(this_device->devtype));
1224 break; 1273 break;
1225 } 1274 }
1226 memcpy(&currentsd[ncurrent].scsi3addr[0], 1275 currentsd[ncurrent] = *this_device;
1227 &scsi3addr[0], 8);
1228 currentsd[ncurrent].devtype = devtype;
1229 currentsd[ncurrent].bus = -1;
1230 currentsd[ncurrent].target = -1;
1231 currentsd[ncurrent].lun = -1;
1232 ncurrent++; 1276 ncurrent++;
1233 break; 1277 break;
1234 default: 1278 default:
@@ -1240,6 +1284,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno)
1240out: 1284out:
1241 kfree(inq_buff); 1285 kfree(inq_buff);
1242 kfree(ld_buff); 1286 kfree(ld_buff);
1287 kfree(currentsd);
1243 return; 1288 return;
1244} 1289}
1245 1290
diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h
index d9c2c586502f..7b750245ae76 100644
--- a/drivers/block/cciss_scsi.h
+++ b/drivers/block/cciss_scsi.h
@@ -66,6 +66,10 @@ struct cciss_scsi_dev_t {
66 int devtype; 66 int devtype;
67 int bus, target, lun; /* as presented to the OS */ 67 int bus, target, lun; /* as presented to the OS */
68 unsigned char scsi3addr[8]; /* as presented to the HW */ 68 unsigned char scsi3addr[8]; /* as presented to the HW */
69 unsigned char device_id[16]; /* from inquiry pg. 0x83 */
70 unsigned char vendor[8]; /* bytes 8-15 of inquiry data */
71 unsigned char model[16]; /* bytes 16-31 of inquiry data */
72 unsigned char revision[4]; /* bytes 32-35 of inquiry data */
69}; 73};
70 74
71struct cciss_scsi_hba_t { 75struct cciss_scsi_hba_t {
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 09c14341e6e3..3d967525e9a9 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -424,7 +424,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev)
424 hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t), 424 hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t),
425 &(hba[i]->cmd_pool_dhandle)); 425 &(hba[i]->cmd_pool_dhandle));
426 hba[i]->cmd_pool_bits = kcalloc( 426 hba[i]->cmd_pool_bits = kcalloc(
427 (NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long), 427 DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long),
428 GFP_KERNEL); 428 GFP_KERNEL);
429 429
430 if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool) 430 if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 395f8ea7981c..cf64ddf5d839 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -423,8 +423,15 @@ static struct floppy_raw_cmd *raw_cmd, default_raw_cmd;
423 * 1581's logical side 0 is on physical side 1, whereas the Sharp's logical 423 * 1581's logical side 0 is on physical side 1, whereas the Sharp's logical
424 * side 0 is on physical side 0 (but with the misnamed sector IDs). 424 * side 0 is on physical side 0 (but with the misnamed sector IDs).
425 * 'stretch' should probably be renamed to something more general, like 425 * 'stretch' should probably be renamed to something more general, like
426 * 'options'. Other parameters should be self-explanatory (see also 426 * 'options'.
427 * setfdprm(8)). 427 *
428 * Bits 2 through 9 of 'stretch' tell the number of the first sector.
429 * The LSB (bit 2) is flipped. For most disks, the first sector
430 * is 1 (represented by 0x00<<2). For some CP/M and music sampler
431 * disks (such as Ensoniq EPS 16plus) it is 0 (represented as 0x01<<2).
432 * For Amstrad CPC disks it is 0xC1 (represented as 0xC0<<2).
433 *
434 * Other parameters should be self-explanatory (see also setfdprm(8)).
428 */ 435 */
429/* 436/*
430 Size 437 Size
@@ -1355,20 +1362,20 @@ static void fdc_specify(void)
1355 } 1362 }
1356 1363
1357 /* Convert step rate from microseconds to milliseconds and 4 bits */ 1364 /* Convert step rate from microseconds to milliseconds and 4 bits */
1358 srt = 16 - (DP->srt * scale_dtr / 1000 + NOMINAL_DTR - 1) / NOMINAL_DTR; 1365 srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR);
1359 if (slow_floppy) { 1366 if (slow_floppy) {
1360 srt = srt / 4; 1367 srt = srt / 4;
1361 } 1368 }
1362 SUPBOUND(srt, 0xf); 1369 SUPBOUND(srt, 0xf);
1363 INFBOUND(srt, 0); 1370 INFBOUND(srt, 0);
1364 1371
1365 hlt = (DP->hlt * scale_dtr / 2 + NOMINAL_DTR - 1) / NOMINAL_DTR; 1372 hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR);
1366 if (hlt < 0x01) 1373 if (hlt < 0x01)
1367 hlt = 0x01; 1374 hlt = 0x01;
1368 else if (hlt > 0x7f) 1375 else if (hlt > 0x7f)
1369 hlt = hlt_max_code; 1376 hlt = hlt_max_code;
1370 1377
1371 hut = (DP->hut * scale_dtr / 16 + NOMINAL_DTR - 1) / NOMINAL_DTR; 1378 hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR);
1372 if (hut < 0x1) 1379 if (hut < 0x1)
1373 hut = 0x1; 1380 hut = 0x1;
1374 else if (hut > 0xf) 1381 else if (hut > 0xf)
@@ -2236,9 +2243,9 @@ static void setup_format_params(int track)
2236 } 2243 }
2237 } 2244 }
2238 } 2245 }
2239 if (_floppy->stretch & FD_ZEROBASED) { 2246 if (_floppy->stretch & FD_SECTBASEMASK) {
2240 for (count = 0; count < F_SECT_PER_TRACK; count++) 2247 for (count = 0; count < F_SECT_PER_TRACK; count++)
2241 here[count].sect--; 2248 here[count].sect += FD_SECTBASE(_floppy) - 1;
2242 } 2249 }
2243} 2250}
2244 2251
@@ -2385,7 +2392,7 @@ static void rw_interrupt(void)
2385 2392
2386#ifdef FLOPPY_SANITY_CHECK 2393#ifdef FLOPPY_SANITY_CHECK
2387 if (nr_sectors / ssize > 2394 if (nr_sectors / ssize >
2388 (in_sector_offset + current_count_sectors + ssize - 1) / ssize) { 2395 DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) {
2389 DPRINT("long rw: %x instead of %lx\n", 2396 DPRINT("long rw: %x instead of %lx\n",
2390 nr_sectors, current_count_sectors); 2397 nr_sectors, current_count_sectors);
2391 printk("rs=%d s=%d\n", R_SECTOR, SECTOR); 2398 printk("rs=%d s=%d\n", R_SECTOR, SECTOR);
@@ -2649,7 +2656,7 @@ static int make_raw_rw_request(void)
2649 } 2656 }
2650 HEAD = fsector_t / _floppy->sect; 2657 HEAD = fsector_t / _floppy->sect;
2651 2658
2652 if (((_floppy->stretch & (FD_SWAPSIDES | FD_ZEROBASED)) || 2659 if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) ||
2653 TESTF(FD_NEED_TWADDLE)) && fsector_t < _floppy->sect) 2660 TESTF(FD_NEED_TWADDLE)) && fsector_t < _floppy->sect)
2654 max_sector = _floppy->sect; 2661 max_sector = _floppy->sect;
2655 2662
@@ -2679,7 +2686,7 @@ static int make_raw_rw_request(void)
2679 CODE2SIZE; 2686 CODE2SIZE;
2680 SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; 2687 SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE;
2681 SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + 2688 SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) +
2682 ((_floppy->stretch & FD_ZEROBASED) ? 0 : 1); 2689 FD_SECTBASE(_floppy);
2683 2690
2684 /* tracksize describes the size which can be filled up with sectors 2691 /* tracksize describes the size which can be filled up with sectors
2685 * of size ssize. 2692 * of size ssize.
@@ -3311,7 +3318,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g,
3311 g->head <= 0 || 3318 g->head <= 0 ||
3312 g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) || 3319 g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) ||
3313 /* check if reserved bits are set */ 3320 /* check if reserved bits are set */
3314 (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_ZEROBASED)) != 0) 3321 (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0)
3315 return -EINVAL; 3322 return -EINVAL;
3316 if (type) { 3323 if (type) {
3317 if (!capable(CAP_SYS_ADMIN)) 3324 if (!capable(CAP_SYS_ADMIN))
@@ -3356,7 +3363,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g,
3356 if (DRS->maxblock > user_params[drive].sect || 3363 if (DRS->maxblock > user_params[drive].sect ||
3357 DRS->maxtrack || 3364 DRS->maxtrack ||
3358 ((user_params[drive].sect ^ oldStretch) & 3365 ((user_params[drive].sect ^ oldStretch) &
3359 (FD_SWAPSIDES | FD_ZEROBASED))) 3366 (FD_SWAPSIDES | FD_SECTBASEMASK)))
3360 invalidate_drive(bdev); 3367 invalidate_drive(bdev);
3361 else 3368 else
3362 process_fd_request(); 3369 process_fd_request();
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 1778e4a2c672..7b3351260d56 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -403,7 +403,7 @@ static int nbd_do_it(struct nbd_device *lo)
403 BUG_ON(lo->magic != LO_MAGIC); 403 BUG_ON(lo->magic != LO_MAGIC);
404 404
405 lo->pid = current->pid; 405 lo->pid = current->pid;
406 ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr); 406 ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
407 if (ret) { 407 if (ret) {
408 printk(KERN_ERR "nbd: sysfs_create_file failed!"); 408 printk(KERN_ERR "nbd: sysfs_create_file failed!");
409 return ret; 409 return ret;
@@ -412,7 +412,7 @@ static int nbd_do_it(struct nbd_device *lo)
412 while ((req = nbd_read_stat(lo)) != NULL) 412 while ((req = nbd_read_stat(lo)) != NULL)
413 nbd_end_request(req); 413 nbd_end_request(req);
414 414
415 sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr); 415 sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr);
416 return 0; 416 return 0;
417} 417}
418 418
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 29b7a648cc6e..0e077150568b 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2544,7 +2544,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
2544 if (last_zone != zone) { 2544 if (last_zone != zone) {
2545 BUG_ON(last_zone != zone + pd->settings.size); 2545 BUG_ON(last_zone != zone + pd->settings.size);
2546 first_sectors = last_zone - bio->bi_sector; 2546 first_sectors = last_zone - bio->bi_sector;
2547 bp = bio_split(bio, bio_split_pool, first_sectors); 2547 bp = bio_split(bio, first_sectors);
2548 BUG_ON(!bp); 2548 BUG_ON(!bp);
2549 pkt_make_request(q, &bp->bio1); 2549 pkt_make_request(q, &bp->bio1);
2550 pkt_make_request(q, &bp->bio2); 2550 pkt_make_request(q, &bp->bio2);
@@ -2911,7 +2911,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
2911 if (!disk->queue) 2911 if (!disk->queue)
2912 goto out_mem2; 2912 goto out_mem2;
2913 2913
2914 pd->pkt_dev = MKDEV(disk->major, disk->first_minor); 2914 pd->pkt_dev = MKDEV(pktdev_major, idx);
2915 ret = pkt_new_dev(pd, dev); 2915 ret = pkt_new_dev(pd, dev);
2916 if (ret) 2916 if (ret)
2917 goto out_new_dev; 2917 goto out_new_dev;
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index d797e209951d..936466f62afd 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -199,7 +199,8 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
199 if (blk_fs_request(req)) { 199 if (blk_fs_request(req)) {
200 if (ps3disk_submit_request_sg(dev, req)) 200 if (ps3disk_submit_request_sg(dev, req))
201 break; 201 break;
202 } else if (req->cmd_type == REQ_TYPE_FLUSH) { 202 } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
203 req->cmd[0] == REQ_LB_OP_FLUSH) {
203 if (ps3disk_submit_flush_request(dev, req)) 204 if (ps3disk_submit_flush_request(dev, req))
204 break; 205 break;
205 } else { 206 } else {
@@ -257,7 +258,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
257 return IRQ_HANDLED; 258 return IRQ_HANDLED;
258 } 259 }
259 260
260 if (req->cmd_type == REQ_TYPE_FLUSH) { 261 if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
262 req->cmd[0] == REQ_LB_OP_FLUSH) {
261 read = 0; 263 read = 0;
262 num_sectors = req->hard_cur_sectors; 264 num_sectors = req->hard_cur_sectors;
263 op = "flush"; 265 op = "flush";
@@ -405,7 +407,8 @@ static void ps3disk_prepare_flush(struct request_queue *q, struct request *req)
405 407
406 dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); 408 dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
407 409
408 req->cmd_type = REQ_TYPE_FLUSH; 410 req->cmd_type = REQ_TYPE_LINUX_BLOCK;
411 req->cmd[0] = REQ_LB_OP_FLUSH;
409} 412}
410 413
411static unsigned long ps3disk_mask; 414static unsigned long ps3disk_mask;
@@ -538,7 +541,7 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev)
538 struct ps3disk_private *priv = dev->sbd.core.driver_data; 541 struct ps3disk_private *priv = dev->sbd.core.driver_data;
539 542
540 mutex_lock(&ps3disk_mask_mutex); 543 mutex_lock(&ps3disk_mask_mutex);
541 __clear_bit(priv->gendisk->first_minor / PS3DISK_MINORS, 544 __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS,
542 &ps3disk_mask); 545 &ps3disk_mask);
543 mutex_unlock(&ps3disk_mask_mutex); 546 mutex_unlock(&ps3disk_mask_mutex);
544 del_gendisk(priv->gendisk); 547 del_gendisk(priv->gendisk);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 42251095134f..6ec5fc052786 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -47,20 +47,20 @@ static void blk_done(struct virtqueue *vq)
47 47
48 spin_lock_irqsave(&vblk->lock, flags); 48 spin_lock_irqsave(&vblk->lock, flags);
49 while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) { 49 while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
50 int uptodate; 50 int error;
51 switch (vbr->status) { 51 switch (vbr->status) {
52 case VIRTIO_BLK_S_OK: 52 case VIRTIO_BLK_S_OK:
53 uptodate = 1; 53 error = 0;
54 break; 54 break;
55 case VIRTIO_BLK_S_UNSUPP: 55 case VIRTIO_BLK_S_UNSUPP:
56 uptodate = -ENOTTY; 56 error = -ENOTTY;
57 break; 57 break;
58 default: 58 default:
59 uptodate = 0; 59 error = -EIO;
60 break; 60 break;
61 } 61 }
62 62
63 end_dequeued_request(vbr->req, uptodate); 63 __blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req));
64 list_del(&vbr->list); 64 list_del(&vbr->list);
65 mempool_free(vbr, vblk->pool); 65 mempool_free(vbr, vblk->pool);
66 } 66 }
@@ -84,11 +84,11 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
84 if (blk_fs_request(vbr->req)) { 84 if (blk_fs_request(vbr->req)) {
85 vbr->out_hdr.type = 0; 85 vbr->out_hdr.type = 0;
86 vbr->out_hdr.sector = vbr->req->sector; 86 vbr->out_hdr.sector = vbr->req->sector;
87 vbr->out_hdr.ioprio = vbr->req->ioprio; 87 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
88 } else if (blk_pc_request(vbr->req)) { 88 } else if (blk_pc_request(vbr->req)) {
89 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; 89 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
90 vbr->out_hdr.sector = 0; 90 vbr->out_hdr.sector = 0;
91 vbr->out_hdr.ioprio = vbr->req->ioprio; 91 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
92 } else { 92 } else {
93 /* We don't put anything else in the queue. */ 93 /* We don't put anything else in the queue. */
94 BUG(); 94 BUG();
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3ca643cafccd..bff602ccccf3 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -105,15 +105,17 @@ static DEFINE_SPINLOCK(blkif_io_lock);
105#define GRANT_INVALID_REF 0 105#define GRANT_INVALID_REF 0
106 106
107#define PARTS_PER_DISK 16 107#define PARTS_PER_DISK 16
108#define PARTS_PER_EXT_DISK 256
108 109
109#define BLKIF_MAJOR(dev) ((dev)>>8) 110#define BLKIF_MAJOR(dev) ((dev)>>8)
110#define BLKIF_MINOR(dev) ((dev) & 0xff) 111#define BLKIF_MINOR(dev) ((dev) & 0xff)
111 112
112#define DEV_NAME "xvd" /* name in /dev */ 113#define EXT_SHIFT 28
114#define EXTENDED (1<<EXT_SHIFT)
115#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
116#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
113 117
114/* Information about our VBDs. */ 118#define DEV_NAME "xvd" /* name in /dev */
115#define MAX_VBDS 64
116static LIST_HEAD(vbds_list);
117 119
118static int get_id_from_freelist(struct blkfront_info *info) 120static int get_id_from_freelist(struct blkfront_info *info)
119{ 121{
@@ -386,31 +388,60 @@ static int xlvbd_barrier(struct blkfront_info *info)
386} 388}
387 389
388 390
389static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, 391static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
390 int vdevice, u16 vdisk_info, u16 sector_size, 392 struct blkfront_info *info,
391 struct blkfront_info *info) 393 u16 vdisk_info, u16 sector_size)
392{ 394{
393 struct gendisk *gd; 395 struct gendisk *gd;
394 int nr_minors = 1; 396 int nr_minors = 1;
395 int err = -ENODEV; 397 int err = -ENODEV;
398 unsigned int offset;
399 int minor;
400 int nr_parts;
396 401
397 BUG_ON(info->gd != NULL); 402 BUG_ON(info->gd != NULL);
398 BUG_ON(info->rq != NULL); 403 BUG_ON(info->rq != NULL);
399 404
400 if ((minor % PARTS_PER_DISK) == 0) 405 if ((info->vdevice>>EXT_SHIFT) > 1) {
401 nr_minors = PARTS_PER_DISK; 406 /* this is above the extended range; something is wrong */
407 printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
408 return -ENODEV;
409 }
410
411 if (!VDEV_IS_EXTENDED(info->vdevice)) {
412 minor = BLKIF_MINOR(info->vdevice);
413 nr_parts = PARTS_PER_DISK;
414 } else {
415 minor = BLKIF_MINOR_EXT(info->vdevice);
416 nr_parts = PARTS_PER_EXT_DISK;
417 }
418
419 if ((minor % nr_parts) == 0)
420 nr_minors = nr_parts;
402 421
403 gd = alloc_disk(nr_minors); 422 gd = alloc_disk(nr_minors);
404 if (gd == NULL) 423 if (gd == NULL)
405 goto out; 424 goto out;
406 425
407 if (nr_minors > 1) 426 offset = minor / nr_parts;
408 sprintf(gd->disk_name, "%s%c", DEV_NAME, 427
409 'a' + minor / PARTS_PER_DISK); 428 if (nr_minors > 1) {
410 else 429 if (offset < 26)
411 sprintf(gd->disk_name, "%s%c%d", DEV_NAME, 430 sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset);
412 'a' + minor / PARTS_PER_DISK, 431 else
413 minor % PARTS_PER_DISK); 432 sprintf(gd->disk_name, "%s%c%c", DEV_NAME,
433 'a' + ((offset / 26)-1), 'a' + (offset % 26));
434 } else {
435 if (offset < 26)
436 sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
437 'a' + offset,
438 minor & (nr_parts - 1));
439 else
440 sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME,
441 'a' + ((offset / 26) - 1),
442 'a' + (offset % 26),
443 minor & (nr_parts - 1));
444 }
414 445
415 gd->major = XENVBD_MAJOR; 446 gd->major = XENVBD_MAJOR;
416 gd->first_minor = minor; 447 gd->first_minor = minor;
@@ -699,8 +730,13 @@ static int blkfront_probe(struct xenbus_device *dev,
699 err = xenbus_scanf(XBT_NIL, dev->nodename, 730 err = xenbus_scanf(XBT_NIL, dev->nodename,
700 "virtual-device", "%i", &vdevice); 731 "virtual-device", "%i", &vdevice);
701 if (err != 1) { 732 if (err != 1) {
702 xenbus_dev_fatal(dev, err, "reading virtual-device"); 733 /* go looking in the extended area instead */
703 return err; 734 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
735 "%i", &vdevice);
736 if (err != 1) {
737 xenbus_dev_fatal(dev, err, "reading virtual-device");
738 return err;
739 }
704 } 740 }
705 741
706 info = kzalloc(sizeof(*info), GFP_KERNEL); 742 info = kzalloc(sizeof(*info), GFP_KERNEL);
@@ -861,9 +897,7 @@ static void blkfront_connect(struct blkfront_info *info)
861 if (err) 897 if (err)
862 info->feature_barrier = 0; 898 info->feature_barrier = 0;
863 899
864 err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice), 900 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
865 sectors, info->vdevice,
866 binfo, sector_size, info);
867 if (err) { 901 if (err) {
868 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", 902 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
869 info->xbdev->otherend); 903 info->xbdev->otherend);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 74031de517e6..d47f2f80accd 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2097,7 +2097,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
2097 2097
2098 len = nr * CD_FRAMESIZE_RAW; 2098 len = nr * CD_FRAMESIZE_RAW;
2099 2099
2100 ret = blk_rq_map_user(q, rq, ubuf, len); 2100 ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
2101 if (ret) 2101 if (ret)
2102 break; 2102 break;
2103 2103
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 1231d95aa695..d6ba77a2dd7b 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -624,14 +624,14 @@ static void gdrom_readdisk_dma(struct work_struct *work)
624 ctrl_outb(1, GDROM_DMA_STATUS_REG); 624 ctrl_outb(1, GDROM_DMA_STATUS_REG);
625 wait_event_interruptible_timeout(request_queue, 625 wait_event_interruptible_timeout(request_queue,
626 gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); 626 gd.transfer == 0, GDROM_DEFAULT_TIMEOUT);
627 err = gd.transfer; 627 err = gd.transfer ? -EIO : 0;
628 gd.transfer = 0; 628 gd.transfer = 0;
629 gd.pending = 0; 629 gd.pending = 0;
630 /* now seek to take the request spinlock 630 /* now seek to take the request spinlock
631 * before handling ending the request */ 631 * before handling ending the request */
632 spin_lock(&gdrom_lock); 632 spin_lock(&gdrom_lock);
633 list_del_init(&req->queuelist); 633 list_del_init(&req->queuelist);
634 end_dequeued_request(req, 1 - err); 634 __blk_end_request(req, err, blk_rq_bytes(req));
635 } 635 }
636 spin_unlock(&gdrom_lock); 636 spin_unlock(&gdrom_lock);
637 kfree(read_command); 637 kfree(read_command);
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 7ce1ac4baa6d..6af435b89867 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -661,10 +661,10 @@ void add_disk_randomness(struct gendisk *disk)
661 if (!disk || !disk->random) 661 if (!disk || !disk->random)
662 return; 662 return;
663 /* first major is 1, so we get >= 0x200 here */ 663 /* first major is 1, so we get >= 0x200 here */
664 DEBUG_ENT("disk event %d:%d\n", disk->major, disk->first_minor); 664 DEBUG_ENT("disk event %d:%d\n",
665 MAJOR(disk_devt(disk)), MINOR(disk_devt(disk)));
665 666
666 add_timer_randomness(disk->random, 667 add_timer_randomness(disk->random, 0x100 + disk_devt(disk));
667 0x100 + MKDEV(disk->major, disk->first_minor));
668} 668}
669#endif 669#endif
670 670
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index f16bb4667238..03c2cb6a58bc 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1113,7 +1113,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq)
1113 1113
1114 if (write) { 1114 if (write) {
1115 /* disk has become write protected */ 1115 /* disk has become write protected */
1116 if (cd->disk->policy) { 1116 if (get_disk_ro(cd->disk)) {
1117 cdrom_end_request(drive, 0); 1117 cdrom_end_request(drive, 0);
1118 return ide_stopped; 1118 return ide_stopped;
1119 } 1119 }
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 07ef88bd109b..33ea8c048717 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -41,6 +41,12 @@
41#include <asm/io.h> 41#include <asm/io.h>
42#include <asm/div64.h> 42#include <asm/div64.h>
43 43
44#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
45#define IDE_DISK_MINORS (1 << PARTN_BITS)
46#else
47#define IDE_DISK_MINORS 0
48#endif
49
44struct ide_disk_obj { 50struct ide_disk_obj {
45 ide_drive_t *drive; 51 ide_drive_t *drive;
46 ide_driver_t *driver; 52 ide_driver_t *driver;
@@ -1151,8 +1157,7 @@ static int ide_disk_probe(ide_drive_t *drive)
1151 if (!idkp) 1157 if (!idkp)
1152 goto failed; 1158 goto failed;
1153 1159
1154 g = alloc_disk_node(1 << PARTN_BITS, 1160 g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif));
1155 hwif_to_node(drive->hwif));
1156 if (!g) 1161 if (!g)
1157 goto out_free_idkp; 1162 goto out_free_idkp;
1158 1163
@@ -1178,9 +1183,11 @@ static int ide_disk_probe(ide_drive_t *drive)
1178 } else 1183 } else
1179 drive->attach = 1; 1184 drive->attach = 1;
1180 1185
1181 g->minors = 1 << PARTN_BITS; 1186 g->minors = IDE_DISK_MINORS;
1182 g->driverfs_dev = &drive->gendev; 1187 g->driverfs_dev = &drive->gendev;
1183 g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; 1188 g->flags |= GENHD_FL_EXT_DEVT;
1189 if (drive->removable)
1190 g->flags |= GENHD_FL_REMOVABLE;
1184 set_capacity(g, idedisk_capacity(drive)); 1191 set_capacity(g, idedisk_capacity(drive));
1185 g->fops = &idedisk_ops; 1192 g->fops = &idedisk_ops;
1186 add_disk(g); 1193 add_disk(g);
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index a51a30e9eab3..70aa86c8807e 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1188,7 +1188,7 @@ static struct kobject *exact_match(dev_t dev, int *part, void *data)
1188{ 1188{
1189 struct gendisk *p = data; 1189 struct gendisk *p = data;
1190 *part &= (1 << PARTN_BITS) - 1; 1190 *part &= (1 << PARTN_BITS) - 1;
1191 return &p->dev.kobj; 1191 return &disk_to_dev(p)->kobj;
1192} 1192}
1193 1193
1194static int exact_lock(dev_t dev, void *data) 1194static int exact_lock(dev_t dev, void *data)
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index b262c0042de3..5b919159f084 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -426,7 +426,7 @@ static int list_devices(struct dm_ioctl *param, size_t param_size)
426 old_nl->next = (uint32_t) ((void *) nl - 426 old_nl->next = (uint32_t) ((void *) nl -
427 (void *) old_nl); 427 (void *) old_nl);
428 disk = dm_disk(hc->md); 428 disk = dm_disk(hc->md);
429 nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); 429 nl->dev = huge_encode_dev(disk_devt(disk));
430 nl->next = 0; 430 nl->next = 0;
431 strcpy(nl->name, hc->name); 431 strcpy(nl->name, hc->name);
432 432
@@ -539,7 +539,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
539 if (dm_suspended(md)) 539 if (dm_suspended(md))
540 param->flags |= DM_SUSPEND_FLAG; 540 param->flags |= DM_SUSPEND_FLAG;
541 541
542 param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); 542 param->dev = huge_encode_dev(disk_devt(disk));
543 543
544 /* 544 /*
545 * Yes, this will be out of date by the time it gets back 545 * Yes, this will be out of date by the time it gets back
@@ -548,7 +548,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
548 */ 548 */
549 param->open_count = dm_open_count(md); 549 param->open_count = dm_open_count(md);
550 550
551 if (disk->policy) 551 if (get_disk_ro(disk))
552 param->flags |= DM_READONLY_FLAG; 552 param->flags |= DM_READONLY_FLAG;
553 553
554 param->event_nr = dm_get_event_nr(md); 554 param->event_nr = dm_get_event_nr(md);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c2fcf28b4c70..3d3848132c69 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -33,6 +33,7 @@ struct pgpath {
33 unsigned fail_count; /* Cumulative failure count */ 33 unsigned fail_count; /* Cumulative failure count */
34 34
35 struct dm_path path; 35 struct dm_path path;
36 struct work_struct deactivate_path;
36}; 37};
37 38
38#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 39#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
@@ -112,6 +113,7 @@ static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
112static void process_queued_ios(struct work_struct *work); 113static void process_queued_ios(struct work_struct *work);
113static void trigger_event(struct work_struct *work); 114static void trigger_event(struct work_struct *work);
114static void activate_path(struct work_struct *work); 115static void activate_path(struct work_struct *work);
116static void deactivate_path(struct work_struct *work);
115 117
116 118
117/*----------------------------------------------- 119/*-----------------------------------------------
@@ -122,8 +124,10 @@ static struct pgpath *alloc_pgpath(void)
122{ 124{
123 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); 125 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
124 126
125 if (pgpath) 127 if (pgpath) {
126 pgpath->path.is_active = 1; 128 pgpath->path.is_active = 1;
129 INIT_WORK(&pgpath->deactivate_path, deactivate_path);
130 }
127 131
128 return pgpath; 132 return pgpath;
129} 133}
@@ -133,6 +137,14 @@ static void free_pgpath(struct pgpath *pgpath)
133 kfree(pgpath); 137 kfree(pgpath);
134} 138}
135 139
140static void deactivate_path(struct work_struct *work)
141{
142 struct pgpath *pgpath =
143 container_of(work, struct pgpath, deactivate_path);
144
145 blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue);
146}
147
136static struct priority_group *alloc_priority_group(void) 148static struct priority_group *alloc_priority_group(void)
137{ 149{
138 struct priority_group *pg; 150 struct priority_group *pg;
@@ -870,6 +882,7 @@ static int fail_path(struct pgpath *pgpath)
870 pgpath->path.dev->name, m->nr_valid_paths); 882 pgpath->path.dev->name, m->nr_valid_paths);
871 883
872 queue_work(kmultipathd, &m->trigger_event); 884 queue_work(kmultipathd, &m->trigger_event);
885 queue_work(kmultipathd, &pgpath->deactivate_path);
873 886
874out: 887out:
875 spin_unlock_irqrestore(&m->lock, flags); 888 spin_unlock_irqrestore(&m->lock, flags);
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4de90ab3968b..b745d8ac625b 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -284,8 +284,8 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
284 284
285 memset(major_minor, 0, sizeof(major_minor)); 285 memset(major_minor, 0, sizeof(major_minor));
286 sprintf(major_minor, "%d:%d", 286 sprintf(major_minor, "%d:%d",
287 bio->bi_bdev->bd_disk->major, 287 MAJOR(disk_devt(bio->bi_bdev->bd_disk)),
288 bio->bi_bdev->bd_disk->first_minor); 288 MINOR(disk_devt(bio->bi_bdev->bd_disk)));
289 289
290 /* 290 /*
291 * Test to see which stripe drive triggered the event 291 * Test to see which stripe drive triggered the event
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ace998ce59f6..327de03a5bdf 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -377,13 +377,14 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
377static void start_io_acct(struct dm_io *io) 377static void start_io_acct(struct dm_io *io)
378{ 378{
379 struct mapped_device *md = io->md; 379 struct mapped_device *md = io->md;
380 int cpu;
380 381
381 io->start_time = jiffies; 382 io->start_time = jiffies;
382 383
383 preempt_disable(); 384 cpu = part_stat_lock();
384 disk_round_stats(dm_disk(md)); 385 part_round_stats(cpu, &dm_disk(md)->part0);
385 preempt_enable(); 386 part_stat_unlock();
386 dm_disk(md)->in_flight = atomic_inc_return(&md->pending); 387 dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
387} 388}
388 389
389static int end_io_acct(struct dm_io *io) 390static int end_io_acct(struct dm_io *io)
@@ -391,15 +392,16 @@ static int end_io_acct(struct dm_io *io)
391 struct mapped_device *md = io->md; 392 struct mapped_device *md = io->md;
392 struct bio *bio = io->bio; 393 struct bio *bio = io->bio;
393 unsigned long duration = jiffies - io->start_time; 394 unsigned long duration = jiffies - io->start_time;
394 int pending; 395 int pending, cpu;
395 int rw = bio_data_dir(bio); 396 int rw = bio_data_dir(bio);
396 397
397 preempt_disable(); 398 cpu = part_stat_lock();
398 disk_round_stats(dm_disk(md)); 399 part_round_stats(cpu, &dm_disk(md)->part0);
399 preempt_enable(); 400 part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
400 dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); 401 part_stat_unlock();
401 402
402 disk_stat_add(dm_disk(md), ticks[rw], duration); 403 dm_disk(md)->part0.in_flight = pending =
404 atomic_dec_return(&md->pending);
403 405
404 return !pending; 406 return !pending;
405} 407}
@@ -885,6 +887,7 @@ static int dm_request(struct request_queue *q, struct bio *bio)
885 int r = -EIO; 887 int r = -EIO;
886 int rw = bio_data_dir(bio); 888 int rw = bio_data_dir(bio);
887 struct mapped_device *md = q->queuedata; 889 struct mapped_device *md = q->queuedata;
890 int cpu;
888 891
889 /* 892 /*
890 * There is no use in forwarding any barrier request since we can't 893 * There is no use in forwarding any barrier request since we can't
@@ -897,8 +900,10 @@ static int dm_request(struct request_queue *q, struct bio *bio)
897 900
898 down_read(&md->io_lock); 901 down_read(&md->io_lock);
899 902
900 disk_stat_inc(dm_disk(md), ios[rw]); 903 cpu = part_stat_lock();
901 disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); 904 part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]);
905 part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio));
906 part_stat_unlock();
902 907
903 /* 908 /*
904 * If we're suspended we have to queue 909 * If we're suspended we have to queue
@@ -1146,7 +1151,7 @@ static void unlock_fs(struct mapped_device *md);
1146 1151
1147static void free_dev(struct mapped_device *md) 1152static void free_dev(struct mapped_device *md)
1148{ 1153{
1149 int minor = md->disk->first_minor; 1154 int minor = MINOR(disk_devt(md->disk));
1150 1155
1151 if (md->suspended_bdev) { 1156 if (md->suspended_bdev) {
1152 unlock_fs(md); 1157 unlock_fs(md);
@@ -1182,7 +1187,7 @@ static void event_callback(void *context)
1182 list_splice_init(&md->uevent_list, &uevents); 1187 list_splice_init(&md->uevent_list, &uevents);
1183 spin_unlock_irqrestore(&md->uevent_lock, flags); 1188 spin_unlock_irqrestore(&md->uevent_lock, flags);
1184 1189
1185 dm_send_uevents(&uevents, &md->disk->dev.kobj); 1190 dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
1186 1191
1187 atomic_inc(&md->event_nr); 1192 atomic_inc(&md->event_nr);
1188 wake_up(&md->eventq); 1193 wake_up(&md->eventq);
@@ -1267,7 +1272,7 @@ static struct mapped_device *dm_find_md(dev_t dev)
1267 1272
1268 md = idr_find(&_minor_idr, minor); 1273 md = idr_find(&_minor_idr, minor);
1269 if (md && (md == MINOR_ALLOCED || 1274 if (md && (md == MINOR_ALLOCED ||
1270 (dm_disk(md)->first_minor != minor) || 1275 (MINOR(disk_devt(dm_disk(md))) != minor) ||
1271 test_bit(DMF_FREEING, &md->flags))) { 1276 test_bit(DMF_FREEING, &md->flags))) {
1272 md = NULL; 1277 md = NULL;
1273 goto out; 1278 goto out;
@@ -1318,7 +1323,8 @@ void dm_put(struct mapped_device *md)
1318 1323
1319 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { 1324 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
1320 map = dm_get_table(md); 1325 map = dm_get_table(md);
1321 idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor); 1326 idr_replace(&_minor_idr, MINOR_ALLOCED,
1327 MINOR(disk_devt(dm_disk(md))));
1322 set_bit(DMF_FREEING, &md->flags); 1328 set_bit(DMF_FREEING, &md->flags);
1323 spin_unlock(&_minor_lock); 1329 spin_unlock(&_minor_lock);
1324 if (!dm_suspended(md)) { 1330 if (!dm_suspended(md)) {
@@ -1638,7 +1644,7 @@ out:
1638 *---------------------------------------------------------------*/ 1644 *---------------------------------------------------------------*/
1639void dm_kobject_uevent(struct mapped_device *md) 1645void dm_kobject_uevent(struct mapped_device *md)
1640{ 1646{
1641 kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE); 1647 kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE);
1642} 1648}
1643 1649
1644uint32_t dm_next_uevent_seq(struct mapped_device *md) 1650uint32_t dm_next_uevent_seq(struct mapped_device *md)
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index b1eebf88c209..b9cbee688fae 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -318,14 +318,18 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
318 mddev_t *mddev = q->queuedata; 318 mddev_t *mddev = q->queuedata;
319 dev_info_t *tmp_dev; 319 dev_info_t *tmp_dev;
320 sector_t block; 320 sector_t block;
321 int cpu;
321 322
322 if (unlikely(bio_barrier(bio))) { 323 if (unlikely(bio_barrier(bio))) {
323 bio_endio(bio, -EOPNOTSUPP); 324 bio_endio(bio, -EOPNOTSUPP);
324 return 0; 325 return 0;
325 } 326 }
326 327
327 disk_stat_inc(mddev->gendisk, ios[rw]); 328 cpu = part_stat_lock();
328 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 329 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
330 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
331 bio_sectors(bio));
332 part_stat_unlock();
329 333
330 tmp_dev = which_dev(mddev, bio->bi_sector); 334 tmp_dev = which_dev(mddev, bio->bi_sector);
331 block = bio->bi_sector >> 1; 335 block = bio->bi_sector >> 1;
@@ -349,7 +353,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
349 * split it. 353 * split it.
350 */ 354 */
351 struct bio_pair *bp; 355 struct bio_pair *bp;
352 bp = bio_split(bio, bio_split_pool, 356 bp = bio_split(bio,
353 ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); 357 ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector);
354 if (linear_make_request(q, &bp->bio1)) 358 if (linear_make_request(q, &bp->bio1))
355 generic_make_request(&bp->bio1); 359 generic_make_request(&bp->bio1);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index deeac4b44173..0a3a4bdcd4af 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1464,10 +1464,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1464 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) 1464 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
1465 goto fail; 1465 goto fail;
1466 1466
1467 if (rdev->bdev->bd_part) 1467 ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
1468 ko = &rdev->bdev->bd_part->dev.kobj;
1469 else
1470 ko = &rdev->bdev->bd_disk->dev.kobj;
1471 if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { 1468 if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) {
1472 kobject_del(&rdev->kobj); 1469 kobject_del(&rdev->kobj);
1473 goto fail; 1470 goto fail;
@@ -3470,8 +3467,8 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
3470 disk->queue = mddev->queue; 3467 disk->queue = mddev->queue;
3471 add_disk(disk); 3468 add_disk(disk);
3472 mddev->gendisk = disk; 3469 mddev->gendisk = disk;
3473 error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, 3470 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
3474 "%s", "md"); 3471 &disk_to_dev(disk)->kobj, "%s", "md");
3475 mutex_unlock(&disks_mutex); 3472 mutex_unlock(&disks_mutex);
3476 if (error) 3473 if (error)
3477 printk(KERN_WARNING "md: cannot register %s/md - name in use\n", 3474 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
@@ -3761,7 +3758,7 @@ static int do_md_run(mddev_t * mddev)
3761 sysfs_notify(&mddev->kobj, NULL, "array_state"); 3758 sysfs_notify(&mddev->kobj, NULL, "array_state");
3762 sysfs_notify(&mddev->kobj, NULL, "sync_action"); 3759 sysfs_notify(&mddev->kobj, NULL, "sync_action");
3763 sysfs_notify(&mddev->kobj, NULL, "degraded"); 3760 sysfs_notify(&mddev->kobj, NULL, "degraded");
3764 kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); 3761 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
3765 return 0; 3762 return 0;
3766} 3763}
3767 3764
@@ -5549,8 +5546,8 @@ static int is_mddev_idle(mddev_t *mddev)
5549 rcu_read_lock(); 5546 rcu_read_lock();
5550 rdev_for_each_rcu(rdev, mddev) { 5547 rdev_for_each_rcu(rdev, mddev) {
5551 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; 5548 struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
5552 curr_events = disk_stat_read(disk, sectors[0]) + 5549 curr_events = part_stat_read(&disk->part0, sectors[0]) +
5553 disk_stat_read(disk, sectors[1]) - 5550 part_stat_read(&disk->part0, sectors[1]) -
5554 atomic_read(&disk->sync_io); 5551 atomic_read(&disk->sync_io);
5555 /* sync IO will cause sync_io to increase before the disk_stats 5552 /* sync IO will cause sync_io to increase before the disk_stats
5556 * as sync_io is counted when a request starts, and 5553 * as sync_io is counted when a request starts, and
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index c4779ccba1c3..8bb8794129b3 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -147,6 +147,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
147 struct multipath_bh * mp_bh; 147 struct multipath_bh * mp_bh;
148 struct multipath_info *multipath; 148 struct multipath_info *multipath;
149 const int rw = bio_data_dir(bio); 149 const int rw = bio_data_dir(bio);
150 int cpu;
150 151
151 if (unlikely(bio_barrier(bio))) { 152 if (unlikely(bio_barrier(bio))) {
152 bio_endio(bio, -EOPNOTSUPP); 153 bio_endio(bio, -EOPNOTSUPP);
@@ -158,8 +159,11 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
158 mp_bh->master_bio = bio; 159 mp_bh->master_bio = bio;
159 mp_bh->mddev = mddev; 160 mp_bh->mddev = mddev;
160 161
161 disk_stat_inc(mddev->gendisk, ios[rw]); 162 cpu = part_stat_lock();
162 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 163 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
164 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
165 bio_sectors(bio));
166 part_stat_unlock();
163 167
164 mp_bh->path = multipath_map(conf); 168 mp_bh->path = multipath_map(conf);
165 if (mp_bh->path < 0) { 169 if (mp_bh->path < 0) {
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 183610635661..53508a8a981d 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -399,14 +399,18 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
399 sector_t chunk; 399 sector_t chunk;
400 sector_t block, rsect; 400 sector_t block, rsect;
401 const int rw = bio_data_dir(bio); 401 const int rw = bio_data_dir(bio);
402 int cpu;
402 403
403 if (unlikely(bio_barrier(bio))) { 404 if (unlikely(bio_barrier(bio))) {
404 bio_endio(bio, -EOPNOTSUPP); 405 bio_endio(bio, -EOPNOTSUPP);
405 return 0; 406 return 0;
406 } 407 }
407 408
408 disk_stat_inc(mddev->gendisk, ios[rw]); 409 cpu = part_stat_lock();
409 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 410 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
411 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
412 bio_sectors(bio));
413 part_stat_unlock();
410 414
411 chunk_size = mddev->chunk_size >> 10; 415 chunk_size = mddev->chunk_size >> 10;
412 chunk_sects = mddev->chunk_size >> 9; 416 chunk_sects = mddev->chunk_size >> 9;
@@ -423,7 +427,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio)
423 /* This is a one page bio that upper layers 427 /* This is a one page bio that upper layers
424 * refuse to split for us, so we need to split it. 428 * refuse to split for us, so we need to split it.
425 */ 429 */
426 bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); 430 bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)));
427 if (raid0_make_request(q, &bp->bio1)) 431 if (raid0_make_request(q, &bp->bio1))
428 generic_make_request(&bp->bio1); 432 generic_make_request(&bp->bio1);
429 if (raid0_make_request(q, &bp->bio2)) 433 if (raid0_make_request(q, &bp->bio2))
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 03a5ab705c20..b9764429d856 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -779,7 +779,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
779 struct page **behind_pages = NULL; 779 struct page **behind_pages = NULL;
780 const int rw = bio_data_dir(bio); 780 const int rw = bio_data_dir(bio);
781 const int do_sync = bio_sync(bio); 781 const int do_sync = bio_sync(bio);
782 int do_barriers; 782 int cpu, do_barriers;
783 mdk_rdev_t *blocked_rdev; 783 mdk_rdev_t *blocked_rdev;
784 784
785 /* 785 /*
@@ -804,8 +804,11 @@ static int make_request(struct request_queue *q, struct bio * bio)
804 804
805 bitmap = mddev->bitmap; 805 bitmap = mddev->bitmap;
806 806
807 disk_stat_inc(mddev->gendisk, ios[rw]); 807 cpu = part_stat_lock();
808 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 808 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
809 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
810 bio_sectors(bio));
811 part_stat_unlock();
809 812
810 /* 813 /*
811 * make_request() can abort the operation when READA is being 814 * make_request() can abort the operation when READA is being
@@ -1302,9 +1305,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
1302 sbio->bi_size = r1_bio->sectors << 9; 1305 sbio->bi_size = r1_bio->sectors << 9;
1303 sbio->bi_idx = 0; 1306 sbio->bi_idx = 0;
1304 sbio->bi_phys_segments = 0; 1307 sbio->bi_phys_segments = 0;
1305 sbio->bi_hw_segments = 0;
1306 sbio->bi_hw_front_size = 0;
1307 sbio->bi_hw_back_size = 0;
1308 sbio->bi_flags &= ~(BIO_POOL_MASK - 1); 1308 sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1309 sbio->bi_flags |= 1 << BIO_UPTODATE; 1309 sbio->bi_flags |= 1 << BIO_UPTODATE;
1310 sbio->bi_next = NULL; 1310 sbio->bi_next = NULL;
@@ -1790,7 +1790,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1790 bio->bi_vcnt = 0; 1790 bio->bi_vcnt = 0;
1791 bio->bi_idx = 0; 1791 bio->bi_idx = 0;
1792 bio->bi_phys_segments = 0; 1792 bio->bi_phys_segments = 0;
1793 bio->bi_hw_segments = 0;
1794 bio->bi_size = 0; 1793 bio->bi_size = 0;
1795 bio->bi_end_io = NULL; 1794 bio->bi_end_io = NULL;
1796 bio->bi_private = NULL; 1795 bio->bi_private = NULL;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e34cd0e62473..8bdc9bfc2887 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -789,6 +789,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
789 mirror_info_t *mirror; 789 mirror_info_t *mirror;
790 r10bio_t *r10_bio; 790 r10bio_t *r10_bio;
791 struct bio *read_bio; 791 struct bio *read_bio;
792 int cpu;
792 int i; 793 int i;
793 int chunk_sects = conf->chunk_mask + 1; 794 int chunk_sects = conf->chunk_mask + 1;
794 const int rw = bio_data_dir(bio); 795 const int rw = bio_data_dir(bio);
@@ -816,7 +817,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
816 /* This is a one page bio that upper layers 817 /* This is a one page bio that upper layers
817 * refuse to split for us, so we need to split it. 818 * refuse to split for us, so we need to split it.
818 */ 819 */
819 bp = bio_split(bio, bio_split_pool, 820 bp = bio_split(bio,
820 chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); 821 chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
821 if (make_request(q, &bp->bio1)) 822 if (make_request(q, &bp->bio1))
822 generic_make_request(&bp->bio1); 823 generic_make_request(&bp->bio1);
@@ -843,8 +844,11 @@ static int make_request(struct request_queue *q, struct bio * bio)
843 */ 844 */
844 wait_barrier(conf); 845 wait_barrier(conf);
845 846
846 disk_stat_inc(mddev->gendisk, ios[rw]); 847 cpu = part_stat_lock();
847 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); 848 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
849 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
850 bio_sectors(bio));
851 part_stat_unlock();
848 852
849 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); 853 r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
850 854
@@ -1345,9 +1349,6 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
1345 tbio->bi_size = r10_bio->sectors << 9; 1349 tbio->bi_size = r10_bio->sectors << 9;
1346 tbio->bi_idx = 0; 1350 tbio->bi_idx = 0;
1347 tbio->bi_phys_segments = 0; 1351 tbio->bi_phys_segments = 0;
1348 tbio->bi_hw_segments = 0;
1349 tbio->bi_hw_front_size = 0;
1350 tbio->bi_hw_back_size = 0;
1351 tbio->bi_flags &= ~(BIO_POOL_MASK - 1); 1352 tbio->bi_flags &= ~(BIO_POOL_MASK - 1);
1352 tbio->bi_flags |= 1 << BIO_UPTODATE; 1353 tbio->bi_flags |= 1 << BIO_UPTODATE;
1353 tbio->bi_next = NULL; 1354 tbio->bi_next = NULL;
@@ -1947,7 +1948,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1947 bio->bi_vcnt = 0; 1948 bio->bi_vcnt = 0;
1948 bio->bi_idx = 0; 1949 bio->bi_idx = 0;
1949 bio->bi_phys_segments = 0; 1950 bio->bi_phys_segments = 0;
1950 bio->bi_hw_segments = 0;
1951 bio->bi_size = 0; 1951 bio->bi_size = 0;
1952 } 1952 }
1953 1953
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 224de022e7c5..ae16794bef20 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -101,6 +101,40 @@
101const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); 101const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
102#endif 102#endif
103 103
104/*
105 * We maintain a biased count of active stripes in the bottom 16 bits of
106 * bi_phys_segments, and a count of processed stripes in the upper 16 bits
107 */
108static inline int raid5_bi_phys_segments(struct bio *bio)
109{
110 return bio->bi_phys_segments & 0xffff;
111}
112
113static inline int raid5_bi_hw_segments(struct bio *bio)
114{
115 return (bio->bi_phys_segments >> 16) & 0xffff;
116}
117
118static inline int raid5_dec_bi_phys_segments(struct bio *bio)
119{
120 --bio->bi_phys_segments;
121 return raid5_bi_phys_segments(bio);
122}
123
124static inline int raid5_dec_bi_hw_segments(struct bio *bio)
125{
126 unsigned short val = raid5_bi_hw_segments(bio);
127
128 --val;
129 bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio);
130 return val;
131}
132
133static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt)
134{
135 bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16);
136}
137
104static inline int raid6_next_disk(int disk, int raid_disks) 138static inline int raid6_next_disk(int disk, int raid_disks)
105{ 139{
106 disk++; 140 disk++;
@@ -507,7 +541,7 @@ static void ops_complete_biofill(void *stripe_head_ref)
507 while (rbi && rbi->bi_sector < 541 while (rbi && rbi->bi_sector <
508 dev->sector + STRIPE_SECTORS) { 542 dev->sector + STRIPE_SECTORS) {
509 rbi2 = r5_next_bio(rbi, dev->sector); 543 rbi2 = r5_next_bio(rbi, dev->sector);
510 if (--rbi->bi_phys_segments == 0) { 544 if (!raid5_dec_bi_phys_segments(rbi)) {
511 rbi->bi_next = return_bi; 545 rbi->bi_next = return_bi;
512 return_bi = rbi; 546 return_bi = rbi;
513 } 547 }
@@ -1725,7 +1759,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1725 if (*bip) 1759 if (*bip)
1726 bi->bi_next = *bip; 1760 bi->bi_next = *bip;
1727 *bip = bi; 1761 *bip = bi;
1728 bi->bi_phys_segments ++; 1762 bi->bi_phys_segments++;
1729 spin_unlock_irq(&conf->device_lock); 1763 spin_unlock_irq(&conf->device_lock);
1730 spin_unlock(&sh->lock); 1764 spin_unlock(&sh->lock);
1731 1765
@@ -1819,7 +1853,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
1819 sh->dev[i].sector + STRIPE_SECTORS) { 1853 sh->dev[i].sector + STRIPE_SECTORS) {
1820 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 1854 struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
1821 clear_bit(BIO_UPTODATE, &bi->bi_flags); 1855 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1822 if (--bi->bi_phys_segments == 0) { 1856 if (!raid5_dec_bi_phys_segments(bi)) {
1823 md_write_end(conf->mddev); 1857 md_write_end(conf->mddev);
1824 bi->bi_next = *return_bi; 1858 bi->bi_next = *return_bi;
1825 *return_bi = bi; 1859 *return_bi = bi;
@@ -1834,7 +1868,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
1834 sh->dev[i].sector + STRIPE_SECTORS) { 1868 sh->dev[i].sector + STRIPE_SECTORS) {
1835 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 1869 struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
1836 clear_bit(BIO_UPTODATE, &bi->bi_flags); 1870 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1837 if (--bi->bi_phys_segments == 0) { 1871 if (!raid5_dec_bi_phys_segments(bi)) {
1838 md_write_end(conf->mddev); 1872 md_write_end(conf->mddev);
1839 bi->bi_next = *return_bi; 1873 bi->bi_next = *return_bi;
1840 *return_bi = bi; 1874 *return_bi = bi;
@@ -1858,7 +1892,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh,
1858 struct bio *nextbi = 1892 struct bio *nextbi =
1859 r5_next_bio(bi, sh->dev[i].sector); 1893 r5_next_bio(bi, sh->dev[i].sector);
1860 clear_bit(BIO_UPTODATE, &bi->bi_flags); 1894 clear_bit(BIO_UPTODATE, &bi->bi_flags);
1861 if (--bi->bi_phys_segments == 0) { 1895 if (!raid5_dec_bi_phys_segments(bi)) {
1862 bi->bi_next = *return_bi; 1896 bi->bi_next = *return_bi;
1863 *return_bi = bi; 1897 *return_bi = bi;
1864 } 1898 }
@@ -2033,7 +2067,7 @@ static void handle_stripe_clean_event(raid5_conf_t *conf,
2033 while (wbi && wbi->bi_sector < 2067 while (wbi && wbi->bi_sector <
2034 dev->sector + STRIPE_SECTORS) { 2068 dev->sector + STRIPE_SECTORS) {
2035 wbi2 = r5_next_bio(wbi, dev->sector); 2069 wbi2 = r5_next_bio(wbi, dev->sector);
2036 if (--wbi->bi_phys_segments == 0) { 2070 if (!raid5_dec_bi_phys_segments(wbi)) {
2037 md_write_end(conf->mddev); 2071 md_write_end(conf->mddev);
2038 wbi->bi_next = *return_bi; 2072 wbi->bi_next = *return_bi;
2039 *return_bi = wbi; 2073 *return_bi = wbi;
@@ -2814,7 +2848,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
2814 copy_data(0, rbi, dev->page, dev->sector); 2848 copy_data(0, rbi, dev->page, dev->sector);
2815 rbi2 = r5_next_bio(rbi, dev->sector); 2849 rbi2 = r5_next_bio(rbi, dev->sector);
2816 spin_lock_irq(&conf->device_lock); 2850 spin_lock_irq(&conf->device_lock);
2817 if (--rbi->bi_phys_segments == 0) { 2851 if (!raid5_dec_bi_phys_segments(rbi)) {
2818 rbi->bi_next = return_bi; 2852 rbi->bi_next = return_bi;
2819 return_bi = rbi; 2853 return_bi = rbi;
2820 } 2854 }
@@ -3155,8 +3189,11 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf)
3155 if(bi) { 3189 if(bi) {
3156 conf->retry_read_aligned_list = bi->bi_next; 3190 conf->retry_read_aligned_list = bi->bi_next;
3157 bi->bi_next = NULL; 3191 bi->bi_next = NULL;
3192 /*
3193 * this sets the active strip count to 1 and the processed
3194 * strip count to zero (upper 8 bits)
3195 */
3158 bi->bi_phys_segments = 1; /* biased count of active stripes */ 3196 bi->bi_phys_segments = 1; /* biased count of active stripes */
3159 bi->bi_hw_segments = 0; /* count of processed stripes */
3160 } 3197 }
3161 3198
3162 return bi; 3199 return bi;
@@ -3206,8 +3243,7 @@ static int bio_fits_rdev(struct bio *bi)
3206 if ((bi->bi_size>>9) > q->max_sectors) 3243 if ((bi->bi_size>>9) > q->max_sectors)
3207 return 0; 3244 return 0;
3208 blk_recount_segments(q, bi); 3245 blk_recount_segments(q, bi);
3209 if (bi->bi_phys_segments > q->max_phys_segments || 3246 if (bi->bi_phys_segments > q->max_phys_segments)
3210 bi->bi_hw_segments > q->max_hw_segments)
3211 return 0; 3247 return 0;
3212 3248
3213 if (q->merge_bvec_fn) 3249 if (q->merge_bvec_fn)
@@ -3351,7 +3387,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3351 sector_t logical_sector, last_sector; 3387 sector_t logical_sector, last_sector;
3352 struct stripe_head *sh; 3388 struct stripe_head *sh;
3353 const int rw = bio_data_dir(bi); 3389 const int rw = bio_data_dir(bi);
3354 int remaining; 3390 int cpu, remaining;
3355 3391
3356 if (unlikely(bio_barrier(bi))) { 3392 if (unlikely(bio_barrier(bi))) {
3357 bio_endio(bi, -EOPNOTSUPP); 3393 bio_endio(bi, -EOPNOTSUPP);
@@ -3360,8 +3396,11 @@ static int make_request(struct request_queue *q, struct bio * bi)
3360 3396
3361 md_write_start(mddev, bi); 3397 md_write_start(mddev, bi);
3362 3398
3363 disk_stat_inc(mddev->gendisk, ios[rw]); 3399 cpu = part_stat_lock();
3364 disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi)); 3400 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
3401 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
3402 bio_sectors(bi));
3403 part_stat_unlock();
3365 3404
3366 if (rw == READ && 3405 if (rw == READ &&
3367 mddev->reshape_position == MaxSector && 3406 mddev->reshape_position == MaxSector &&
@@ -3468,7 +3507,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3468 3507
3469 } 3508 }
3470 spin_lock_irq(&conf->device_lock); 3509 spin_lock_irq(&conf->device_lock);
3471 remaining = --bi->bi_phys_segments; 3510 remaining = raid5_dec_bi_phys_segments(bi);
3472 spin_unlock_irq(&conf->device_lock); 3511 spin_unlock_irq(&conf->device_lock);
3473 if (remaining == 0) { 3512 if (remaining == 0) {
3474 3513
@@ -3752,7 +3791,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3752 sector += STRIPE_SECTORS, 3791 sector += STRIPE_SECTORS,
3753 scnt++) { 3792 scnt++) {
3754 3793
3755 if (scnt < raid_bio->bi_hw_segments) 3794 if (scnt < raid5_bi_hw_segments(raid_bio))
3756 /* already done this stripe */ 3795 /* already done this stripe */
3757 continue; 3796 continue;
3758 3797
@@ -3760,7 +3799,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3760 3799
3761 if (!sh) { 3800 if (!sh) {
3762 /* failed to get a stripe - must wait */ 3801 /* failed to get a stripe - must wait */
3763 raid_bio->bi_hw_segments = scnt; 3802 raid5_set_bi_hw_segments(raid_bio, scnt);
3764 conf->retry_read_aligned = raid_bio; 3803 conf->retry_read_aligned = raid_bio;
3765 return handled; 3804 return handled;
3766 } 3805 }
@@ -3768,7 +3807,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3768 set_bit(R5_ReadError, &sh->dev[dd_idx].flags); 3807 set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
3769 if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { 3808 if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
3770 release_stripe(sh); 3809 release_stripe(sh);
3771 raid_bio->bi_hw_segments = scnt; 3810 raid5_set_bi_hw_segments(raid_bio, scnt);
3772 conf->retry_read_aligned = raid_bio; 3811 conf->retry_read_aligned = raid_bio;
3773 return handled; 3812 return handled;
3774 } 3813 }
@@ -3778,7 +3817,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
3778 handled++; 3817 handled++;
3779 } 3818 }
3780 spin_lock_irq(&conf->device_lock); 3819 spin_lock_irq(&conf->device_lock);
3781 remaining = --raid_bio->bi_phys_segments; 3820 remaining = raid5_dec_bi_phys_segments(raid_bio);
3782 spin_unlock_irq(&conf->device_lock); 3821 spin_unlock_irq(&conf->device_lock);
3783 if (remaining == 0) 3822 if (remaining == 0)
3784 bio_endio(raid_bio, 0); 3823 bio_endio(raid_bio, 0);
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index d2d2318dafa4..6e291bf8237a 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -197,7 +197,7 @@ static int mspro_block_bd_open(struct inode *inode, struct file *filp)
197static int mspro_block_disk_release(struct gendisk *disk) 197static int mspro_block_disk_release(struct gendisk *disk)
198{ 198{
199 struct mspro_block_data *msb = disk->private_data; 199 struct mspro_block_data *msb = disk->private_data;
200 int disk_id = disk->first_minor >> MSPRO_BLOCK_PART_SHIFT; 200 int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT;
201 201
202 mutex_lock(&mspro_block_disk_lock); 202 mutex_lock(&mspro_block_disk_lock);
203 203
@@ -828,7 +828,7 @@ static void mspro_block_submit_req(struct request_queue *q)
828 828
829 if (msb->eject) { 829 if (msb->eject) {
830 while ((req = elv_next_request(q)) != NULL) 830 while ((req = elv_next_request(q)) != NULL)
831 end_queued_request(req, -ENODEV); 831 __blk_end_request(req, -ENODEV, blk_rq_bytes(req));
832 832
833 return; 833 return;
834 } 834 }
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index ebc8b9d77613..97156b689e82 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -83,7 +83,7 @@ static void mmc_blk_put(struct mmc_blk_data *md)
83 mutex_lock(&open_lock); 83 mutex_lock(&open_lock);
84 md->usage--; 84 md->usage--;
85 if (md->usage == 0) { 85 if (md->usage == 0) {
86 int devidx = md->disk->first_minor >> MMC_SHIFT; 86 int devidx = MINOR(disk_devt(md->disk)) >> MMC_SHIFT;
87 __clear_bit(devidx, dev_use); 87 __clear_bit(devidx, dev_use);
88 88
89 put_disk(md->disk); 89 put_disk(md->disk);
diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index f34f20c78911..9bf581c4f740 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -1005,6 +1005,29 @@ static int ftl_writesect(struct mtd_blktrans_dev *dev,
1005 return ftl_write((void *)dev, buf, block, 1); 1005 return ftl_write((void *)dev, buf, block, 1);
1006} 1006}
1007 1007
1008static int ftl_discardsect(struct mtd_blktrans_dev *dev,
1009 unsigned long sector, unsigned nr_sects)
1010{
1011 partition_t *part = (void *)dev;
1012 uint32_t bsize = 1 << part->header.EraseUnitSize;
1013
1014 DEBUG(1, "FTL erase sector %ld for %d sectors\n",
1015 sector, nr_sects);
1016
1017 while (nr_sects) {
1018 uint32_t old_addr = part->VirtualBlockMap[sector];
1019 if (old_addr != 0xffffffff) {
1020 part->VirtualBlockMap[sector] = 0xffffffff;
1021 part->EUNInfo[old_addr/bsize].Deleted++;
1022 if (set_bam_entry(part, old_addr, 0))
1023 return -EIO;
1024 }
1025 nr_sects--;
1026 sector++;
1027 }
1028
1029 return 0;
1030}
1008/*====================================================================*/ 1031/*====================================================================*/
1009 1032
1010static void ftl_freepart(partition_t *part) 1033static void ftl_freepart(partition_t *part)
@@ -1069,6 +1092,7 @@ static struct mtd_blktrans_ops ftl_tr = {
1069 .blksize = SECTOR_SIZE, 1092 .blksize = SECTOR_SIZE,
1070 .readsect = ftl_readsect, 1093 .readsect = ftl_readsect,
1071 .writesect = ftl_writesect, 1094 .writesect = ftl_writesect,
1095 .discard = ftl_discardsect,
1072 .getgeo = ftl_getgeo, 1096 .getgeo = ftl_getgeo,
1073 .add_mtd = ftl_add_mtd, 1097 .add_mtd = ftl_add_mtd,
1074 .remove_dev = ftl_remove_dev, 1098 .remove_dev = ftl_remove_dev,
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 9ff007c4962c..681d5aca2af4 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -32,6 +32,14 @@ struct mtd_blkcore_priv {
32 spinlock_t queue_lock; 32 spinlock_t queue_lock;
33}; 33};
34 34
35static int blktrans_discard_request(struct request_queue *q,
36 struct request *req)
37{
38 req->cmd_type = REQ_TYPE_LINUX_BLOCK;
39 req->cmd[0] = REQ_LB_OP_DISCARD;
40 return 0;
41}
42
35static int do_blktrans_request(struct mtd_blktrans_ops *tr, 43static int do_blktrans_request(struct mtd_blktrans_ops *tr,
36 struct mtd_blktrans_dev *dev, 44 struct mtd_blktrans_dev *dev,
37 struct request *req) 45 struct request *req)
@@ -44,6 +52,10 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
44 52
45 buf = req->buffer; 53 buf = req->buffer;
46 54
55 if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
56 req->cmd[0] == REQ_LB_OP_DISCARD)
57 return !tr->discard(dev, block, nsect);
58
47 if (!blk_fs_request(req)) 59 if (!blk_fs_request(req))
48 return 0; 60 return 0;
49 61
@@ -367,6 +379,10 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
367 379
368 tr->blkcore_priv->rq->queuedata = tr; 380 tr->blkcore_priv->rq->queuedata = tr;
369 blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize); 381 blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize);
382 if (tr->discard)
383 blk_queue_set_discard(tr->blkcore_priv->rq,
384 blktrans_discard_request);
385
370 tr->blkshift = ffs(tr->blksize) - 1; 386 tr->blkshift = ffs(tr->blksize) - 1;
371 387
372 tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr, 388 tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr,
diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c
index 03c0e40a92ff..e3b5c4d3036e 100644
--- a/drivers/s390/block/dasd_proc.c
+++ b/drivers/s390/block/dasd_proc.c
@@ -76,7 +76,8 @@ dasd_devices_show(struct seq_file *m, void *v)
76 /* Print kdev. */ 76 /* Print kdev. */
77 if (block->gdp) 77 if (block->gdp)
78 seq_printf(m, " at (%3d:%6d)", 78 seq_printf(m, " at (%3d:%6d)",
79 block->gdp->major, block->gdp->first_minor); 79 MAJOR(disk_devt(block->gdp)),
80 MINOR(disk_devt(block->gdp)));
80 else 81 else
81 seq_printf(m, " at (???:??????)"); 82 seq_printf(m, " at (???:??????)");
82 /* Print device name. */ 83 /* Print device name. */
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 711b3004b3e6..9481e4a3f76e 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -114,7 +114,7 @@ dcssblk_assign_free_minor(struct dcssblk_dev_info *dev_info)
114 found = 0; 114 found = 0;
115 // test if minor available 115 // test if minor available
116 list_for_each_entry(entry, &dcssblk_devices, lh) 116 list_for_each_entry(entry, &dcssblk_devices, lh)
117 if (minor == entry->gd->first_minor) 117 if (minor == MINOR(disk_devt(entry->gd)))
118 found++; 118 found++;
119 if (!found) break; // got unused minor 119 if (!found) break; // got unused minor
120 } 120 }
@@ -397,7 +397,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
397 goto unload_seg; 397 goto unload_seg;
398 } 398 }
399 sprintf(dev_info->gd->disk_name, "dcssblk%d", 399 sprintf(dev_info->gd->disk_name, "dcssblk%d",
400 dev_info->gd->first_minor); 400 MINOR(disk_devt(dev_info->gd)));
401 list_add_tail(&dev_info->lh, &dcssblk_devices); 401 list_add_tail(&dev_info->lh, &dcssblk_devices);
402 402
403 if (!try_module_get(THIS_MODULE)) { 403 if (!try_module_get(THIS_MODULE)) {
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index aa4e77c25273..8abfd06b5a72 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -1139,7 +1139,7 @@ static struct aac_srb * aac_scsi_common(struct fib * fib, struct scsi_cmnd * cmd
1139 srbcmd->id = cpu_to_le32(scmd_id(cmd)); 1139 srbcmd->id = cpu_to_le32(scmd_id(cmd));
1140 srbcmd->lun = cpu_to_le32(cmd->device->lun); 1140 srbcmd->lun = cpu_to_le32(cmd->device->lun);
1141 srbcmd->flags = cpu_to_le32(flag); 1141 srbcmd->flags = cpu_to_le32(flag);
1142 timeout = cmd->timeout_per_command/HZ; 1142 timeout = cmd->request->timeout/HZ;
1143 if (timeout == 0) 1143 if (timeout == 0)
1144 timeout = 1; 1144 timeout = 1;
1145 srbcmd->timeout = cpu_to_le32(timeout); // timeout in seconds 1145 srbcmd->timeout = cpu_to_le32(timeout); // timeout in seconds
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index 822d5214692b..c387c15a2128 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -464,7 +464,6 @@ int __gdth_execute(struct scsi_device *sdev, gdth_cmd_str *gdtcmd, char *cmnd,
464 464
465 /* use request field to save the ptr. to completion struct. */ 465 /* use request field to save the ptr. to completion struct. */
466 scp->request = (struct request *)&wait; 466 scp->request = (struct request *)&wait;
467 scp->timeout_per_command = timeout*HZ;
468 scp->cmd_len = 12; 467 scp->cmd_len = 12;
469 scp->cmnd = cmnd; 468 scp->cmnd = cmnd;
470 cmndinfo.priority = IOCTL_PRI; 469 cmndinfo.priority = IOCTL_PRI;
@@ -1995,23 +1994,12 @@ static void gdth_putq(gdth_ha_str *ha, Scsi_Cmnd *scp, unchar priority)
1995 register Scsi_Cmnd *pscp; 1994 register Scsi_Cmnd *pscp;
1996 register Scsi_Cmnd *nscp; 1995 register Scsi_Cmnd *nscp;
1997 ulong flags; 1996 ulong flags;
1998 unchar b, t;
1999 1997
2000 TRACE(("gdth_putq() priority %d\n",priority)); 1998 TRACE(("gdth_putq() priority %d\n",priority));
2001 spin_lock_irqsave(&ha->smp_lock, flags); 1999 spin_lock_irqsave(&ha->smp_lock, flags);
2002 2000
2003 if (!cmndinfo->internal_command) { 2001 if (!cmndinfo->internal_command)
2004 cmndinfo->priority = priority; 2002 cmndinfo->priority = priority;
2005 b = scp->device->channel;
2006 t = scp->device->id;
2007 if (priority >= DEFAULT_PRI) {
2008 if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha,b)].lock) ||
2009 (b==ha->virt_bus && t<MAX_HDRIVES && ha->hdr[t].lock)) {
2010 TRACE2(("gdth_putq(): locked IO ->update_timeout()\n"));
2011 cmndinfo->timeout = gdth_update_timeout(scp, 0);
2012 }
2013 }
2014 }
2015 2003
2016 if (ha->req_first==NULL) { 2004 if (ha->req_first==NULL) {
2017 ha->req_first = scp; /* queue was empty */ 2005 ha->req_first = scp; /* queue was empty */
@@ -3899,6 +3887,39 @@ static const char *gdth_info(struct Scsi_Host *shp)
3899 return ((const char *)ha->binfo.type_string); 3887 return ((const char *)ha->binfo.type_string);
3900} 3888}
3901 3889
3890static enum blk_eh_timer_return gdth_timed_out(struct scsi_cmnd *scp)
3891{
3892 gdth_ha_str *ha = shost_priv(scp->device->host);
3893 struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
3894 unchar b, t;
3895 ulong flags;
3896 enum blk_eh_timer_return retval = BLK_EH_NOT_HANDLED;
3897
3898 TRACE(("%s() cmd 0x%x\n", scp->cmnd[0], __func__));
3899 b = scp->device->channel;
3900 t = scp->device->id;
3901
3902 /*
3903 * We don't really honor the command timeout, but we try to
3904 * honor 6 times of the actual command timeout! So reset the
3905 * timer if this is less than 6th timeout on this command!
3906 */
3907 if (++cmndinfo->timeout_count < 6)
3908 retval = BLK_EH_RESET_TIMER;
3909
3910 /* Reset the timeout if it is locked IO */
3911 spin_lock_irqsave(&ha->smp_lock, flags);
3912 if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha, b)].lock) ||
3913 (b == ha->virt_bus && t < MAX_HDRIVES && ha->hdr[t].lock)) {
3914 TRACE2(("%s(): locked IO, reset timeout\n", __func__));
3915 retval = BLK_EH_RESET_TIMER;
3916 }
3917 spin_unlock_irqrestore(&ha->smp_lock, flags);
3918
3919 return retval;
3920}
3921
3922
3902static int gdth_eh_bus_reset(Scsi_Cmnd *scp) 3923static int gdth_eh_bus_reset(Scsi_Cmnd *scp)
3903{ 3924{
3904 gdth_ha_str *ha = shost_priv(scp->device->host); 3925 gdth_ha_str *ha = shost_priv(scp->device->host);
@@ -3992,7 +4013,7 @@ static int gdth_queuecommand(struct scsi_cmnd *scp,
3992 BUG_ON(!cmndinfo); 4013 BUG_ON(!cmndinfo);
3993 4014
3994 scp->scsi_done = done; 4015 scp->scsi_done = done;
3995 gdth_update_timeout(scp, scp->timeout_per_command * 6); 4016 cmndinfo->timeout_count = 0;
3996 cmndinfo->priority = DEFAULT_PRI; 4017 cmndinfo->priority = DEFAULT_PRI;
3997 4018
3998 return __gdth_queuecommand(ha, scp, cmndinfo); 4019 return __gdth_queuecommand(ha, scp, cmndinfo);
@@ -4096,12 +4117,10 @@ static int ioc_lockdrv(void __user *arg)
4096 ha->hdr[j].lock = 1; 4117 ha->hdr[j].lock = 1;
4097 spin_unlock_irqrestore(&ha->smp_lock, flags); 4118 spin_unlock_irqrestore(&ha->smp_lock, flags);
4098 gdth_wait_completion(ha, ha->bus_cnt, j); 4119 gdth_wait_completion(ha, ha->bus_cnt, j);
4099 gdth_stop_timeout(ha, ha->bus_cnt, j);
4100 } else { 4120 } else {
4101 spin_lock_irqsave(&ha->smp_lock, flags); 4121 spin_lock_irqsave(&ha->smp_lock, flags);
4102 ha->hdr[j].lock = 0; 4122 ha->hdr[j].lock = 0;
4103 spin_unlock_irqrestore(&ha->smp_lock, flags); 4123 spin_unlock_irqrestore(&ha->smp_lock, flags);
4104 gdth_start_timeout(ha, ha->bus_cnt, j);
4105 gdth_next(ha); 4124 gdth_next(ha);
4106 } 4125 }
4107 } 4126 }
@@ -4539,18 +4558,14 @@ static int gdth_ioctl(struct inode *inode, struct file *filep,
4539 spin_lock_irqsave(&ha->smp_lock, flags); 4558 spin_lock_irqsave(&ha->smp_lock, flags);
4540 ha->raw[i].lock = 1; 4559 ha->raw[i].lock = 1;
4541 spin_unlock_irqrestore(&ha->smp_lock, flags); 4560 spin_unlock_irqrestore(&ha->smp_lock, flags);
4542 for (j = 0; j < ha->tid_cnt; ++j) { 4561 for (j = 0; j < ha->tid_cnt; ++j)
4543 gdth_wait_completion(ha, i, j); 4562 gdth_wait_completion(ha, i, j);
4544 gdth_stop_timeout(ha, i, j);
4545 }
4546 } else { 4563 } else {
4547 spin_lock_irqsave(&ha->smp_lock, flags); 4564 spin_lock_irqsave(&ha->smp_lock, flags);
4548 ha->raw[i].lock = 0; 4565 ha->raw[i].lock = 0;
4549 spin_unlock_irqrestore(&ha->smp_lock, flags); 4566 spin_unlock_irqrestore(&ha->smp_lock, flags);
4550 for (j = 0; j < ha->tid_cnt; ++j) { 4567 for (j = 0; j < ha->tid_cnt; ++j)
4551 gdth_start_timeout(ha, i, j);
4552 gdth_next(ha); 4568 gdth_next(ha);
4553 }
4554 } 4569 }
4555 } 4570 }
4556 break; 4571 break;
@@ -4644,6 +4659,7 @@ static struct scsi_host_template gdth_template = {
4644 .slave_configure = gdth_slave_configure, 4659 .slave_configure = gdth_slave_configure,
4645 .bios_param = gdth_bios_param, 4660 .bios_param = gdth_bios_param,
4646 .proc_info = gdth_proc_info, 4661 .proc_info = gdth_proc_info,
4662 .eh_timed_out = gdth_timed_out,
4647 .proc_name = "gdth", 4663 .proc_name = "gdth",
4648 .can_queue = GDTH_MAXCMDS, 4664 .can_queue = GDTH_MAXCMDS,
4649 .this_id = -1, 4665 .this_id = -1,
diff --git a/drivers/scsi/gdth.h b/drivers/scsi/gdth.h
index ca92476727cf..1646444e9bd5 100644
--- a/drivers/scsi/gdth.h
+++ b/drivers/scsi/gdth.h
@@ -916,7 +916,7 @@ typedef struct {
916 gdth_cmd_str *internal_cmd_str; /* crier for internal messages*/ 916 gdth_cmd_str *internal_cmd_str; /* crier for internal messages*/
917 dma_addr_t sense_paddr; /* sense dma-addr */ 917 dma_addr_t sense_paddr; /* sense dma-addr */
918 unchar priority; 918 unchar priority;
919 int timeout; 919 int timeout_count; /* # of timeout calls */
920 volatile int wait_for_completion; 920 volatile int wait_for_completion;
921 ushort status; 921 ushort status;
922 ulong32 info; 922 ulong32 info;
diff --git a/drivers/scsi/gdth_proc.c b/drivers/scsi/gdth_proc.c
index ce0228e26aec..59349a316e13 100644
--- a/drivers/scsi/gdth_proc.c
+++ b/drivers/scsi/gdth_proc.c
@@ -748,69 +748,3 @@ static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id)
748 } 748 }
749 spin_unlock_irqrestore(&ha->smp_lock, flags); 749 spin_unlock_irqrestore(&ha->smp_lock, flags);
750} 750}
751
752static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id)
753{
754 ulong flags;
755 Scsi_Cmnd *scp;
756 unchar b, t;
757
758 spin_lock_irqsave(&ha->smp_lock, flags);
759
760 for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) {
761 struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
762 if (!cmndinfo->internal_command) {
763 b = scp->device->channel;
764 t = scp->device->id;
765 if (t == (unchar)id && b == (unchar)busnum) {
766 TRACE2(("gdth_stop_timeout(): update_timeout()\n"));
767 cmndinfo->timeout = gdth_update_timeout(scp, 0);
768 }
769 }
770 }
771 spin_unlock_irqrestore(&ha->smp_lock, flags);
772}
773
774static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id)
775{
776 ulong flags;
777 Scsi_Cmnd *scp;
778 unchar b, t;
779
780 spin_lock_irqsave(&ha->smp_lock, flags);
781
782 for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) {
783 struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp);
784 if (!cmndinfo->internal_command) {
785 b = scp->device->channel;
786 t = scp->device->id;
787 if (t == (unchar)id && b == (unchar)busnum) {
788 TRACE2(("gdth_start_timeout(): update_timeout()\n"));
789 gdth_update_timeout(scp, cmndinfo->timeout);
790 }
791 }
792 }
793 spin_unlock_irqrestore(&ha->smp_lock, flags);
794}
795
796static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout)
797{
798 int oldto;
799
800 oldto = scp->timeout_per_command;
801 scp->timeout_per_command = timeout;
802
803 if (timeout == 0) {
804 del_timer(&scp->eh_timeout);
805 scp->eh_timeout.data = (unsigned long) NULL;
806 scp->eh_timeout.expires = 0;
807 } else {
808 if (scp->eh_timeout.data != (unsigned long) NULL)
809 del_timer(&scp->eh_timeout);
810 scp->eh_timeout.data = (unsigned long) scp;
811 scp->eh_timeout.expires = jiffies + timeout;
812 add_timer(&scp->eh_timeout);
813 }
814
815 return oldto;
816}
diff --git a/drivers/scsi/gdth_proc.h b/drivers/scsi/gdth_proc.h
index 45e6fdacf36e..9b900cc9ebe8 100644
--- a/drivers/scsi/gdth_proc.h
+++ b/drivers/scsi/gdth_proc.h
@@ -20,9 +20,6 @@ static char *gdth_ioctl_alloc(gdth_ha_str *ha, int size, int scratch,
20 ulong64 *paddr); 20 ulong64 *paddr);
21static void gdth_ioctl_free(gdth_ha_str *ha, int size, char *buf, ulong64 paddr); 21static void gdth_ioctl_free(gdth_ha_str *ha, int size, char *buf, ulong64 paddr);
22static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id); 22static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id);
23static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id);
24static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id);
25static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout);
26 23
27#endif 24#endif
28 25
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 7b1502c0ab6e..87e09f35d3d4 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -756,7 +756,7 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd,
756 init_event_struct(evt_struct, 756 init_event_struct(evt_struct,
757 handle_cmd_rsp, 757 handle_cmd_rsp,
758 VIOSRP_SRP_FORMAT, 758 VIOSRP_SRP_FORMAT,
759 cmnd->timeout_per_command/HZ); 759 cmnd->request->timeout/HZ);
760 760
761 evt_struct->cmnd = cmnd; 761 evt_struct->cmnd = cmnd;
762 evt_struct->cmnd_done = done; 762 evt_struct->cmnd_done = done;
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 461331d3dc45..81c16cba5417 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -612,7 +612,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd,
612 pc->req_xfer = pc->buf_size = scsi_bufflen(cmd); 612 pc->req_xfer = pc->buf_size = scsi_bufflen(cmd);
613 pc->scsi_cmd = cmd; 613 pc->scsi_cmd = cmd;
614 pc->done = done; 614 pc->done = done;
615 pc->timeout = jiffies + cmd->timeout_per_command; 615 pc->timeout = jiffies + cmd->request->timeout;
616 616
617 if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) { 617 if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) {
618 printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number); 618 printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index e7a3a6554425..d30eb7ba018e 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3670,7 +3670,8 @@ static int ipr_slave_configure(struct scsi_device *sdev)
3670 sdev->no_uld_attach = 1; 3670 sdev->no_uld_attach = 1;
3671 } 3671 }
3672 if (ipr_is_vset_device(res)) { 3672 if (ipr_is_vset_device(res)) {
3673 sdev->timeout = IPR_VSET_RW_TIMEOUT; 3673 blk_queue_rq_timeout(sdev->request_queue,
3674 IPR_VSET_RW_TIMEOUT);
3674 blk_queue_max_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS); 3675 blk_queue_max_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS);
3675 } 3676 }
3676 if (ipr_is_vset_device(res) || ipr_is_scsi_disk(res)) 3677 if (ipr_is_vset_device(res) || ipr_is_scsi_disk(res))
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index bc9e6ddf41df..ef683f0d2b5a 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -3818,7 +3818,7 @@ ips_send_cmd(ips_ha_t * ha, ips_scb_t * scb)
3818 scb->cmd.dcdb.segment_4G = 0; 3818 scb->cmd.dcdb.segment_4G = 0;
3819 scb->cmd.dcdb.enhanced_sg = 0; 3819 scb->cmd.dcdb.enhanced_sg = 0;
3820 3820
3821 TimeOut = scb->scsi_cmd->timeout_per_command; 3821 TimeOut = scb->scsi_cmd->request->timeout;
3822 3822
3823 if (ha->subsys->param[4] & 0x00100000) { /* If NEW Tape DCDB is Supported */ 3823 if (ha->subsys->param[4] & 0x00100000) { /* If NEW Tape DCDB is Supported */
3824 if (!scb->sg_len) { 3824 if (!scb->sg_len) {
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 299e075a7b34..1eca82420aab 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1476,12 +1476,12 @@ static void iscsi_start_tx(struct iscsi_conn *conn)
1476 scsi_queue_work(conn->session->host, &conn->xmitwork); 1476 scsi_queue_work(conn->session->host, &conn->xmitwork);
1477} 1477}
1478 1478
1479static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) 1479static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
1480{ 1480{
1481 struct iscsi_cls_session *cls_session; 1481 struct iscsi_cls_session *cls_session;
1482 struct iscsi_session *session; 1482 struct iscsi_session *session;
1483 struct iscsi_conn *conn; 1483 struct iscsi_conn *conn;
1484 enum scsi_eh_timer_return rc = EH_NOT_HANDLED; 1484 enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
1485 1485
1486 cls_session = starget_to_session(scsi_target(scmd->device)); 1486 cls_session = starget_to_session(scsi_target(scmd->device));
1487 session = cls_session->dd_data; 1487 session = cls_session->dd_data;
@@ -1494,14 +1494,14 @@ static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
1494 * We are probably in the middle of iscsi recovery so let 1494 * We are probably in the middle of iscsi recovery so let
1495 * that complete and handle the error. 1495 * that complete and handle the error.
1496 */ 1496 */
1497 rc = EH_RESET_TIMER; 1497 rc = BLK_EH_RESET_TIMER;
1498 goto done; 1498 goto done;
1499 } 1499 }
1500 1500
1501 conn = session->leadconn; 1501 conn = session->leadconn;
1502 if (!conn) { 1502 if (!conn) {
1503 /* In the middle of shuting down */ 1503 /* In the middle of shuting down */
1504 rc = EH_RESET_TIMER; 1504 rc = BLK_EH_RESET_TIMER;
1505 goto done; 1505 goto done;
1506 } 1506 }
1507 1507
@@ -1513,20 +1513,21 @@ static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
1513 */ 1513 */
1514 if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) + 1514 if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
1515 (conn->ping_timeout * HZ), jiffies)) 1515 (conn->ping_timeout * HZ), jiffies))
1516 rc = EH_RESET_TIMER; 1516 rc = BLK_EH_RESET_TIMER;
1517 /* 1517 /*
1518 * if we are about to check the transport then give the command 1518 * if we are about to check the transport then give the command
1519 * more time 1519 * more time
1520 */ 1520 */
1521 if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ), 1521 if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ),
1522 jiffies)) 1522 jiffies))
1523 rc = EH_RESET_TIMER; 1523 rc = BLK_EH_RESET_TIMER;
1524 /* if in the middle of checking the transport then give us more time */ 1524 /* if in the middle of checking the transport then give us more time */
1525 if (conn->ping_task) 1525 if (conn->ping_task)
1526 rc = EH_RESET_TIMER; 1526 rc = BLK_EH_RESET_TIMER;
1527done: 1527done:
1528 spin_unlock(&session->lock); 1528 spin_unlock(&session->lock);
1529 debug_scsi("return %s\n", rc == EH_RESET_TIMER ? "timer reset" : "nh"); 1529 debug_scsi("return %s\n", rc == BLK_EH_RESET_TIMER ?
1530 "timer reset" : "nh");
1530 return rc; 1531 return rc;
1531} 1532}
1532 1533
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index e1872989710a..e15501170698 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -398,7 +398,7 @@ void sas_ata_task_abort(struct sas_task *task)
398 398
399 /* Bounce SCSI-initiated commands to the SCSI EH */ 399 /* Bounce SCSI-initiated commands to the SCSI EH */
400 if (qc->scsicmd) { 400 if (qc->scsicmd) {
401 scsi_req_abort_cmd(qc->scsicmd); 401 blk_abort_request(qc->scsicmd->request);
402 scsi_schedule_eh(qc->scsicmd->device->host); 402 scsi_schedule_eh(qc->scsicmd->device->host);
403 return; 403 return;
404 } 404 }
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index b4f9368f116a..0001374bd6b2 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -55,7 +55,7 @@ void sas_unregister_phys(struct sas_ha_struct *sas_ha);
55int sas_register_ports(struct sas_ha_struct *sas_ha); 55int sas_register_ports(struct sas_ha_struct *sas_ha);
56void sas_unregister_ports(struct sas_ha_struct *sas_ha); 56void sas_unregister_ports(struct sas_ha_struct *sas_ha);
57 57
58enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *); 58enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *);
59 59
60int sas_init_queue(struct sas_ha_struct *sas_ha); 60int sas_init_queue(struct sas_ha_struct *sas_ha);
61int sas_init_events(struct sas_ha_struct *sas_ha); 61int sas_init_events(struct sas_ha_struct *sas_ha);
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index a8e3ef309070..744838780ada 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -673,43 +673,43 @@ out:
673 return; 673 return;
674} 674}
675 675
676enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) 676enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
677{ 677{
678 struct sas_task *task = TO_SAS_TASK(cmd); 678 struct sas_task *task = TO_SAS_TASK(cmd);
679 unsigned long flags; 679 unsigned long flags;
680 680
681 if (!task) { 681 if (!task) {
682 cmd->timeout_per_command /= 2; 682 cmd->request->timeout /= 2;
683 SAS_DPRINTK("command 0x%p, task 0x%p, gone: %s\n", 683 SAS_DPRINTK("command 0x%p, task 0x%p, gone: %s\n",
684 cmd, task, (cmd->timeout_per_command ? 684 cmd, task, (cmd->request->timeout ?
685 "EH_RESET_TIMER" : "EH_NOT_HANDLED")); 685 "BLK_EH_RESET_TIMER" : "BLK_EH_NOT_HANDLED"));
686 if (!cmd->timeout_per_command) 686 if (!cmd->request->timeout)
687 return EH_NOT_HANDLED; 687 return BLK_EH_NOT_HANDLED;
688 return EH_RESET_TIMER; 688 return BLK_EH_RESET_TIMER;
689 } 689 }
690 690
691 spin_lock_irqsave(&task->task_state_lock, flags); 691 spin_lock_irqsave(&task->task_state_lock, flags);
692 BUG_ON(task->task_state_flags & SAS_TASK_STATE_ABORTED); 692 BUG_ON(task->task_state_flags & SAS_TASK_STATE_ABORTED);
693 if (task->task_state_flags & SAS_TASK_STATE_DONE) { 693 if (task->task_state_flags & SAS_TASK_STATE_DONE) {
694 spin_unlock_irqrestore(&task->task_state_lock, flags); 694 spin_unlock_irqrestore(&task->task_state_lock, flags);
695 SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n", 695 SAS_DPRINTK("command 0x%p, task 0x%p, timed out: "
696 cmd, task); 696 "BLK_EH_HANDLED\n", cmd, task);
697 return EH_HANDLED; 697 return BLK_EH_HANDLED;
698 } 698 }
699 if (!(task->task_state_flags & SAS_TASK_AT_INITIATOR)) { 699 if (!(task->task_state_flags & SAS_TASK_AT_INITIATOR)) {
700 spin_unlock_irqrestore(&task->task_state_lock, flags); 700 spin_unlock_irqrestore(&task->task_state_lock, flags);
701 SAS_DPRINTK("command 0x%p, task 0x%p, not at initiator: " 701 SAS_DPRINTK("command 0x%p, task 0x%p, not at initiator: "
702 "EH_RESET_TIMER\n", 702 "BLK_EH_RESET_TIMER\n",
703 cmd, task); 703 cmd, task);
704 return EH_RESET_TIMER; 704 return BLK_EH_RESET_TIMER;
705 } 705 }
706 task->task_state_flags |= SAS_TASK_STATE_ABORTED; 706 task->task_state_flags |= SAS_TASK_STATE_ABORTED;
707 spin_unlock_irqrestore(&task->task_state_lock, flags); 707 spin_unlock_irqrestore(&task->task_state_lock, flags);
708 708
709 SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_NOT_HANDLED\n", 709 SAS_DPRINTK("command 0x%p, task 0x%p, timed out: BLK_EH_NOT_HANDLED\n",
710 cmd, task); 710 cmd, task);
711 711
712 return EH_NOT_HANDLED; 712 return BLK_EH_NOT_HANDLED;
713} 713}
714 714
715int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) 715int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
@@ -1039,7 +1039,7 @@ void sas_task_abort(struct sas_task *task)
1039 return; 1039 return;
1040 } 1040 }
1041 1041
1042 scsi_req_abort_cmd(sc); 1042 blk_abort_request(sc->request);
1043 scsi_schedule_eh(sc->device->host); 1043 scsi_schedule_eh(sc->device->host);
1044} 1044}
1045 1045
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 97b763378e7d..afe1de998763 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -1167,7 +1167,7 @@ static int megasas_generic_reset(struct scsi_cmnd *scmd)
1167 * cmd has not been completed within the timeout period. 1167 * cmd has not been completed within the timeout period.
1168 */ 1168 */
1169static enum 1169static enum
1170scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) 1170blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
1171{ 1171{
1172 struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr; 1172 struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr;
1173 struct megasas_instance *instance; 1173 struct megasas_instance *instance;
@@ -1175,7 +1175,7 @@ scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
1175 1175
1176 if (time_after(jiffies, scmd->jiffies_at_alloc + 1176 if (time_after(jiffies, scmd->jiffies_at_alloc +
1177 (MEGASAS_DEFAULT_CMD_TIMEOUT * 2) * HZ)) { 1177 (MEGASAS_DEFAULT_CMD_TIMEOUT * 2) * HZ)) {
1178 return EH_NOT_HANDLED; 1178 return BLK_EH_NOT_HANDLED;
1179 } 1179 }
1180 1180
1181 instance = cmd->instance; 1181 instance = cmd->instance;
@@ -1189,7 +1189,7 @@ scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
1189 1189
1190 spin_unlock_irqrestore(instance->host->host_lock, flags); 1190 spin_unlock_irqrestore(instance->host->host_lock, flags);
1191 } 1191 }
1192 return EH_RESET_TIMER; 1192 return BLK_EH_RESET_TIMER;
1193} 1193}
1194 1194
1195/** 1195/**
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index c57c94c0ffd2..3b7240e40819 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -4170,8 +4170,8 @@ static int ncr_queue_command (struct ncb *np, struct scsi_cmnd *cmd)
4170 ** 4170 **
4171 **---------------------------------------------------- 4171 **----------------------------------------------------
4172 */ 4172 */
4173 if (np->settle_time && cmd->timeout_per_command >= HZ) { 4173 if (np->settle_time && cmd->request->timeout >= HZ) {
4174 u_long tlimit = jiffies + cmd->timeout_per_command - HZ; 4174 u_long tlimit = jiffies + cmd->request->timeout - HZ;
4175 if (time_after(np->settle_time, tlimit)) 4175 if (time_after(np->settle_time, tlimit))
4176 np->settle_time = tlimit; 4176 np->settle_time = tlimit;
4177 } 4177 }
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 37f9ba0cd798..b6cd12b2e996 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -2845,7 +2845,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
2845 memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8)); 2845 memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8));
2846 2846
2847 /* Set ISP command timeout. */ 2847 /* Set ISP command timeout. */
2848 pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ); 2848 pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ);
2849 2849
2850 /* Set device target ID and LUN */ 2850 /* Set device target ID and LUN */
2851 pkt->lun = SCSI_LUN_32(cmd); 2851 pkt->lun = SCSI_LUN_32(cmd);
@@ -3114,7 +3114,7 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp)
3114 memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8)); 3114 memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8));
3115 3115
3116 /* Set ISP command timeout. */ 3116 /* Set ISP command timeout. */
3117 pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ); 3117 pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ);
3118 3118
3119 /* Set device target ID and LUN */ 3119 /* Set device target ID and LUN */
3120 pkt->lun = SCSI_LUN_32(cmd); 3120 pkt->lun = SCSI_LUN_32(cmd);
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 88bebb13bc52..de8279ad7d89 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -1542,7 +1542,7 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd)
1542 DEBUG2(printk(KERN_INFO 1542 DEBUG2(printk(KERN_INFO
1543 "scsi%ld: DEVICE_RESET cmd=%p jiffies = 0x%lx, to=%x," 1543 "scsi%ld: DEVICE_RESET cmd=%p jiffies = 0x%lx, to=%x,"
1544 "dpc_flags=%lx, status=%x allowed=%d\n", ha->host_no, 1544 "dpc_flags=%lx, status=%x allowed=%d\n", ha->host_no,
1545 cmd, jiffies, cmd->timeout_per_command / HZ, 1545 cmd, jiffies, cmd->request->timeout / HZ,
1546 ha->dpc_flags, cmd->result, cmd->allowed)); 1546 ha->dpc_flags, cmd->result, cmd->allowed));
1547 1547
1548 /* FIXME: wait for hba to go online */ 1548 /* FIXME: wait for hba to go online */
@@ -1598,7 +1598,7 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd)
1598 DEBUG2(printk(KERN_INFO 1598 DEBUG2(printk(KERN_INFO
1599 "scsi%ld: TARGET_DEVICE_RESET cmd=%p jiffies = 0x%lx, " 1599 "scsi%ld: TARGET_DEVICE_RESET cmd=%p jiffies = 0x%lx, "
1600 "to=%x,dpc_flags=%lx, status=%x allowed=%d\n", 1600 "to=%x,dpc_flags=%lx, status=%x allowed=%d\n",
1601 ha->host_no, cmd, jiffies, cmd->timeout_per_command / HZ, 1601 ha->host_no, cmd, jiffies, cmd->request->timeout / HZ,
1602 ha->dpc_flags, cmd->result, cmd->allowed)); 1602 ha->dpc_flags, cmd->result, cmd->allowed));
1603 1603
1604 stat = qla4xxx_reset_target(ha, ddb_entry); 1604 stat = qla4xxx_reset_target(ha, ddb_entry);
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index ee6be596503d..dbeb86cafc0d 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -291,7 +291,6 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask)
291 unsigned long flags; 291 unsigned long flags;
292 292
293 cmd->device = dev; 293 cmd->device = dev;
294 init_timer(&cmd->eh_timeout);
295 INIT_LIST_HEAD(&cmd->list); 294 INIT_LIST_HEAD(&cmd->list);
296 spin_lock_irqsave(&dev->list_lock, flags); 295 spin_lock_irqsave(&dev->list_lock, flags);
297 list_add_tail(&cmd->list, &dev->cmd_list); 296 list_add_tail(&cmd->list, &dev->cmd_list);
@@ -652,14 +651,19 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
652 unsigned long timeout; 651 unsigned long timeout;
653 int rtn = 0; 652 int rtn = 0;
654 653
654 /*
655 * We will use a queued command if possible, otherwise we will
656 * emulate the queuing and calling of completion function ourselves.
657 */
658 atomic_inc(&cmd->device->iorequest_cnt);
659
655 /* check if the device is still usable */ 660 /* check if the device is still usable */
656 if (unlikely(cmd->device->sdev_state == SDEV_DEL)) { 661 if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
657 /* in SDEV_DEL we error all commands. DID_NO_CONNECT 662 /* in SDEV_DEL we error all commands. DID_NO_CONNECT
658 * returns an immediate error upwards, and signals 663 * returns an immediate error upwards, and signals
659 * that the device is no longer present */ 664 * that the device is no longer present */
660 cmd->result = DID_NO_CONNECT << 16; 665 cmd->result = DID_NO_CONNECT << 16;
661 atomic_inc(&cmd->device->iorequest_cnt); 666 scsi_done(cmd);
662 __scsi_done(cmd);
663 /* return 0 (because the command has been processed) */ 667 /* return 0 (because the command has been processed) */
664 goto out; 668 goto out;
665 } 669 }
@@ -672,6 +676,7 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
672 * future requests should not occur until the device 676 * future requests should not occur until the device
673 * transitions out of the suspend state. 677 * transitions out of the suspend state.
674 */ 678 */
679
675 scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); 680 scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
676 681
677 SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked \n")); 682 SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked \n"));
@@ -714,21 +719,9 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
714 host->resetting = 0; 719 host->resetting = 0;
715 } 720 }
716 721
717 /*
718 * AK: unlikely race here: for some reason the timer could
719 * expire before the serial number is set up below.
720 */
721 scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out);
722
723 scsi_log_send(cmd); 722 scsi_log_send(cmd);
724 723
725 /* 724 /*
726 * We will use a queued command if possible, otherwise we will
727 * emulate the queuing and calling of completion function ourselves.
728 */
729 atomic_inc(&cmd->device->iorequest_cnt);
730
731 /*
732 * Before we queue this command, check if the command 725 * Before we queue this command, check if the command
733 * length exceeds what the host adapter can handle. 726 * length exceeds what the host adapter can handle.
734 */ 727 */
@@ -744,6 +737,12 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
744 } 737 }
745 738
746 spin_lock_irqsave(host->host_lock, flags); 739 spin_lock_irqsave(host->host_lock, flags);
740 /*
741 * AK: unlikely race here: for some reason the timer could
742 * expire before the serial number is set up below.
743 *
744 * TODO: kill serial or move to blk layer
745 */
747 scsi_cmd_get_serial(host, cmd); 746 scsi_cmd_get_serial(host, cmd);
748 747
749 if (unlikely(host->shost_state == SHOST_DEL)) { 748 if (unlikely(host->shost_state == SHOST_DEL)) {
@@ -754,12 +753,8 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
754 } 753 }
755 spin_unlock_irqrestore(host->host_lock, flags); 754 spin_unlock_irqrestore(host->host_lock, flags);
756 if (rtn) { 755 if (rtn) {
757 if (scsi_delete_timer(cmd)) { 756 scsi_queue_insert(cmd, (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?
758 atomic_inc(&cmd->device->iodone_cnt); 757 rtn : SCSI_MLQUEUE_HOST_BUSY);
759 scsi_queue_insert(cmd,
760 (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ?
761 rtn : SCSI_MLQUEUE_HOST_BUSY);
762 }
763 SCSI_LOG_MLQUEUE(3, 758 SCSI_LOG_MLQUEUE(3,
764 printk("queuecommand : request rejected\n")); 759 printk("queuecommand : request rejected\n"));
765 } 760 }
@@ -770,24 +765,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
770} 765}
771 766
772/** 767/**
773 * scsi_req_abort_cmd -- Request command recovery for the specified command
774 * @cmd: pointer to the SCSI command of interest
775 *
776 * This function requests that SCSI Core start recovery for the
777 * command by deleting the timer and adding the command to the eh
778 * queue. It can be called by either LLDDs or SCSI Core. LLDDs who
779 * implement their own error recovery MAY ignore the timeout event if
780 * they generated scsi_req_abort_cmd.
781 */
782void scsi_req_abort_cmd(struct scsi_cmnd *cmd)
783{
784 if (!scsi_delete_timer(cmd))
785 return;
786 scsi_times_out(cmd);
787}
788EXPORT_SYMBOL(scsi_req_abort_cmd);
789
790/**
791 * scsi_done - Enqueue the finished SCSI command into the done queue. 768 * scsi_done - Enqueue the finished SCSI command into the done queue.
792 * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives 769 * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
793 * ownership back to SCSI Core -- i.e. the LLDD has finished with it. 770 * ownership back to SCSI Core -- i.e. the LLDD has finished with it.
@@ -802,42 +779,7 @@ EXPORT_SYMBOL(scsi_req_abort_cmd);
802 */ 779 */
803static void scsi_done(struct scsi_cmnd *cmd) 780static void scsi_done(struct scsi_cmnd *cmd)
804{ 781{
805 /* 782 blk_complete_request(cmd->request);
806 * We don't have to worry about this one timing out anymore.
807 * If we are unable to remove the timer, then the command
808 * has already timed out. In which case, we have no choice but to
809 * let the timeout function run, as we have no idea where in fact
810 * that function could really be. It might be on another processor,
811 * etc, etc.
812 */
813 if (!scsi_delete_timer(cmd))
814 return;
815 __scsi_done(cmd);
816}
817
818/* Private entry to scsi_done() to complete a command when the timer
819 * isn't running --- used by scsi_times_out */
820void __scsi_done(struct scsi_cmnd *cmd)
821{
822 struct request *rq = cmd->request;
823
824 /*
825 * Set the serial numbers back to zero
826 */
827 cmd->serial_number = 0;
828
829 atomic_inc(&cmd->device->iodone_cnt);
830 if (cmd->result)
831 atomic_inc(&cmd->device->ioerr_cnt);
832
833 BUG_ON(!rq);
834
835 /*
836 * The uptodate/nbytes values don't matter, as we allow partial
837 * completes and thus will check this in the softirq callback
838 */
839 rq->completion_data = cmd;
840 blk_complete_request(rq);
841} 783}
842 784
843/* Move this to a header if it becomes more generally useful */ 785/* Move this to a header if it becomes more generally useful */
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 39ce3aba1dac..fecefa05cb62 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -112,69 +112,8 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
112} 112}
113 113
114/** 114/**
115 * scsi_add_timer - Start timeout timer for a single scsi command.
116 * @scmd: scsi command that is about to start running.
117 * @timeout: amount of time to allow this command to run.
118 * @complete: timeout function to call if timer isn't canceled.
119 *
120 * Notes:
121 * This should be turned into an inline function. Each scsi command
122 * has its own timer, and as it is added to the queue, we set up the
123 * timer. When the command completes, we cancel the timer.
124 */
125void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,
126 void (*complete)(struct scsi_cmnd *))
127{
128
129 /*
130 * If the clock was already running for this command, then
131 * first delete the timer. The timer handling code gets rather
132 * confused if we don't do this.
133 */
134 if (scmd->eh_timeout.function)
135 del_timer(&scmd->eh_timeout);
136
137 scmd->eh_timeout.data = (unsigned long)scmd;
138 scmd->eh_timeout.expires = jiffies + timeout;
139 scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
140
141 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
142 " %d, (%p)\n", __func__,
143 scmd, timeout, complete));
144
145 add_timer(&scmd->eh_timeout);
146}
147
148/**
149 * scsi_delete_timer - Delete/cancel timer for a given function.
150 * @scmd: Cmd that we are canceling timer for
151 *
152 * Notes:
153 * This should be turned into an inline function.
154 *
155 * Return value:
156 * 1 if we were able to detach the timer. 0 if we blew it, and the
157 * timer function has already started to run.
158 */
159int scsi_delete_timer(struct scsi_cmnd *scmd)
160{
161 int rtn;
162
163 rtn = del_timer(&scmd->eh_timeout);
164
165 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
166 " rtn: %d\n", __func__,
167 scmd, rtn));
168
169 scmd->eh_timeout.data = (unsigned long)NULL;
170 scmd->eh_timeout.function = NULL;
171
172 return rtn;
173}
174
175/**
176 * scsi_times_out - Timeout function for normal scsi commands. 115 * scsi_times_out - Timeout function for normal scsi commands.
177 * @scmd: Cmd that is timing out. 116 * @req: request that is timing out.
178 * 117 *
179 * Notes: 118 * Notes:
180 * We do not need to lock this. There is the potential for a race 119 * We do not need to lock this. There is the potential for a race
@@ -182,9 +121,11 @@ int scsi_delete_timer(struct scsi_cmnd *scmd)
182 * normal completion function determines that the timer has already 121 * normal completion function determines that the timer has already
183 * fired, then it mustn't do anything. 122 * fired, then it mustn't do anything.
184 */ 123 */
185void scsi_times_out(struct scsi_cmnd *scmd) 124enum blk_eh_timer_return scsi_times_out(struct request *req)
186{ 125{
187 enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); 126 struct scsi_cmnd *scmd = req->special;
127 enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *);
128 enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED;
188 129
189 scsi_log_completion(scmd, TIMEOUT_ERROR); 130 scsi_log_completion(scmd, TIMEOUT_ERROR);
190 131
@@ -196,22 +137,20 @@ void scsi_times_out(struct scsi_cmnd *scmd)
196 eh_timed_out = NULL; 137 eh_timed_out = NULL;
197 138
198 if (eh_timed_out) 139 if (eh_timed_out)
199 switch (eh_timed_out(scmd)) { 140 rtn = eh_timed_out(scmd);
200 case EH_HANDLED: 141 switch (rtn) {
201 __scsi_done(scmd); 142 case BLK_EH_NOT_HANDLED:
202 return;
203 case EH_RESET_TIMER:
204 scsi_add_timer(scmd, scmd->timeout_per_command,
205 scsi_times_out);
206 return;
207 case EH_NOT_HANDLED:
208 break; 143 break;
144 default:
145 return rtn;
209 } 146 }
210 147
211 if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) { 148 if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
212 scmd->result |= DID_TIME_OUT << 16; 149 scmd->result |= DID_TIME_OUT << 16;
213 __scsi_done(scmd); 150 return BLK_EH_HANDLED;
214 } 151 }
152
153 return BLK_EH_NOT_HANDLED;
215} 154}
216 155
217/** 156/**
@@ -1793,7 +1732,6 @@ scsi_reset_provider(struct scsi_device *dev, int flag)
1793 1732
1794 blk_rq_init(NULL, &req); 1733 blk_rq_init(NULL, &req);
1795 scmd->request = &req; 1734 scmd->request = &req;
1796 memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
1797 1735
1798 scmd->cmnd = req.cmd; 1736 scmd->cmnd = req.cmd;
1799 1737
@@ -1804,8 +1742,6 @@ scsi_reset_provider(struct scsi_device *dev, int flag)
1804 1742
1805 scmd->sc_data_direction = DMA_BIDIRECTIONAL; 1743 scmd->sc_data_direction = DMA_BIDIRECTIONAL;
1806 1744
1807 init_timer(&scmd->eh_timeout);
1808
1809 spin_lock_irqsave(shost->host_lock, flags); 1745 spin_lock_irqsave(shost->host_lock, flags);
1810 shost->tmf_in_progress = 1; 1746 shost->tmf_in_progress = 1;
1811 spin_unlock_irqrestore(shost->host_lock, flags); 1747 spin_unlock_irqrestore(shost->host_lock, flags);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 62307bd794a9..e7686500e9dd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1181,7 +1181,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
1181 1181
1182 cmd->transfersize = req->data_len; 1182 cmd->transfersize = req->data_len;
1183 cmd->allowed = req->retries; 1183 cmd->allowed = req->retries;
1184 cmd->timeout_per_command = req->timeout;
1185 return BLKPREP_OK; 1184 return BLKPREP_OK;
1186} 1185}
1187EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd); 1186EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd);
@@ -1416,17 +1415,26 @@ static void scsi_kill_request(struct request *req, struct request_queue *q)
1416 spin_unlock(shost->host_lock); 1415 spin_unlock(shost->host_lock);
1417 spin_lock(sdev->request_queue->queue_lock); 1416 spin_lock(sdev->request_queue->queue_lock);
1418 1417
1419 __scsi_done(cmd); 1418 blk_complete_request(req);
1420} 1419}
1421 1420
1422static void scsi_softirq_done(struct request *rq) 1421static void scsi_softirq_done(struct request *rq)
1423{ 1422{
1424 struct scsi_cmnd *cmd = rq->completion_data; 1423 struct scsi_cmnd *cmd = rq->special;
1425 unsigned long wait_for = (cmd->allowed + 1) * cmd->timeout_per_command; 1424 unsigned long wait_for = (cmd->allowed + 1) * rq->timeout;
1426 int disposition; 1425 int disposition;
1427 1426
1428 INIT_LIST_HEAD(&cmd->eh_entry); 1427 INIT_LIST_HEAD(&cmd->eh_entry);
1429 1428
1429 /*
1430 * Set the serial numbers back to zero
1431 */
1432 cmd->serial_number = 0;
1433
1434 atomic_inc(&cmd->device->iodone_cnt);
1435 if (cmd->result)
1436 atomic_inc(&cmd->device->ioerr_cnt);
1437
1430 disposition = scsi_decide_disposition(cmd); 1438 disposition = scsi_decide_disposition(cmd);
1431 if (disposition != SUCCESS && 1439 if (disposition != SUCCESS &&
1432 time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { 1440 time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
@@ -1675,6 +1683,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
1675 1683
1676 blk_queue_prep_rq(q, scsi_prep_fn); 1684 blk_queue_prep_rq(q, scsi_prep_fn);
1677 blk_queue_softirq_done(q, scsi_softirq_done); 1685 blk_queue_softirq_done(q, scsi_softirq_done);
1686 blk_queue_rq_timed_out(q, scsi_times_out);
1678 return q; 1687 return q;
1679} 1688}
1680 1689
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 79f0f7511204..6cddd5dd323c 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -4,6 +4,7 @@
4#include <linux/device.h> 4#include <linux/device.h>
5 5
6struct request_queue; 6struct request_queue;
7struct request;
7struct scsi_cmnd; 8struct scsi_cmnd;
8struct scsi_device; 9struct scsi_device;
9struct scsi_host_template; 10struct scsi_host_template;
@@ -27,7 +28,6 @@ extern void scsi_exit_hosts(void);
27extern int scsi_dispatch_cmd(struct scsi_cmnd *cmd); 28extern int scsi_dispatch_cmd(struct scsi_cmnd *cmd);
28extern int scsi_setup_command_freelist(struct Scsi_Host *shost); 29extern int scsi_setup_command_freelist(struct Scsi_Host *shost);
29extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); 30extern void scsi_destroy_command_freelist(struct Scsi_Host *shost);
30extern void __scsi_done(struct scsi_cmnd *cmd);
31#ifdef CONFIG_SCSI_LOGGING 31#ifdef CONFIG_SCSI_LOGGING
32void scsi_log_send(struct scsi_cmnd *cmd); 32void scsi_log_send(struct scsi_cmnd *cmd);
33void scsi_log_completion(struct scsi_cmnd *cmd, int disposition); 33void scsi_log_completion(struct scsi_cmnd *cmd, int disposition);
@@ -49,10 +49,7 @@ extern int __init scsi_init_devinfo(void);
49extern void scsi_exit_devinfo(void); 49extern void scsi_exit_devinfo(void);
50 50
51/* scsi_error.c */ 51/* scsi_error.c */
52extern void scsi_add_timer(struct scsi_cmnd *, int, 52extern enum blk_eh_timer_return scsi_times_out(struct request *req);
53 void (*)(struct scsi_cmnd *));
54extern int scsi_delete_timer(struct scsi_cmnd *);
55extern void scsi_times_out(struct scsi_cmnd *cmd);
56extern int scsi_error_handler(void *host); 53extern int scsi_error_handler(void *host);
57extern int scsi_decide_disposition(struct scsi_cmnd *cmd); 54extern int scsi_decide_disposition(struct scsi_cmnd *cmd);
58extern void scsi_eh_wakeup(struct Scsi_Host *shost); 55extern void scsi_eh_wakeup(struct Scsi_Host *shost);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index ab3c71869be5..7f618ee5ecea 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -560,12 +560,15 @@ sdev_rd_attr (vendor, "%.8s\n");
560sdev_rd_attr (model, "%.16s\n"); 560sdev_rd_attr (model, "%.16s\n");
561sdev_rd_attr (rev, "%.4s\n"); 561sdev_rd_attr (rev, "%.4s\n");
562 562
563/*
564 * TODO: can we make these symlinks to the block layer ones?
565 */
563static ssize_t 566static ssize_t
564sdev_show_timeout (struct device *dev, struct device_attribute *attr, char *buf) 567sdev_show_timeout (struct device *dev, struct device_attribute *attr, char *buf)
565{ 568{
566 struct scsi_device *sdev; 569 struct scsi_device *sdev;
567 sdev = to_scsi_device(dev); 570 sdev = to_scsi_device(dev);
568 return snprintf (buf, 20, "%d\n", sdev->timeout / HZ); 571 return snprintf(buf, 20, "%d\n", sdev->request_queue->rq_timeout / HZ);
569} 572}
570 573
571static ssize_t 574static ssize_t
@@ -576,7 +579,7 @@ sdev_store_timeout (struct device *dev, struct device_attribute *attr,
576 int timeout; 579 int timeout;
577 sdev = to_scsi_device(dev); 580 sdev = to_scsi_device(dev);
578 sscanf (buf, "%d\n", &timeout); 581 sscanf (buf, "%d\n", &timeout);
579 sdev->timeout = timeout * HZ; 582 blk_queue_rq_timeout(sdev->request_queue, timeout * HZ);
580 return count; 583 return count;
581} 584}
582static DEVICE_ATTR(timeout, S_IRUGO | S_IWUSR, sdev_show_timeout, sdev_store_timeout); 585static DEVICE_ATTR(timeout, S_IRUGO | S_IWUSR, sdev_show_timeout, sdev_store_timeout);
diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c
index 257e097c39af..3117bb106b5d 100644
--- a/drivers/scsi/scsi_tgt_lib.c
+++ b/drivers/scsi/scsi_tgt_lib.c
@@ -362,7 +362,7 @@ static int scsi_map_user_pages(struct scsi_tgt_cmd *tcmd, struct scsi_cmnd *cmd,
362 int err; 362 int err;
363 363
364 dprintk("%lx %u\n", uaddr, len); 364 dprintk("%lx %u\n", uaddr, len);
365 err = blk_rq_map_user(q, rq, (void *)uaddr, len); 365 err = blk_rq_map_user(q, rq, NULL, (void *)uaddr, len, GFP_KERNEL);
366 if (err) { 366 if (err) {
367 /* 367 /*
368 * TODO: need to fixup sg_tablesize, max_segment_size, 368 * TODO: need to fixup sg_tablesize, max_segment_size,
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 56823fd1fb84..9168883d0dfe 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -1950,15 +1950,15 @@ static int fc_vport_match(struct attribute_container *cont,
1950 * Notes: 1950 * Notes:
1951 * This routine assumes no locks are held on entry. 1951 * This routine assumes no locks are held on entry.
1952 */ 1952 */
1953static enum scsi_eh_timer_return 1953static enum blk_eh_timer_return
1954fc_timed_out(struct scsi_cmnd *scmd) 1954fc_timed_out(struct scsi_cmnd *scmd)
1955{ 1955{
1956 struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device)); 1956 struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device));
1957 1957
1958 if (rport->port_state == FC_PORTSTATE_BLOCKED) 1958 if (rport->port_state == FC_PORTSTATE_BLOCKED)
1959 return EH_RESET_TIMER; 1959 return BLK_EH_RESET_TIMER;
1960 1960
1961 return EH_NOT_HANDLED; 1961 return BLK_EH_NOT_HANDLED;
1962} 1962}
1963 1963
1964/* 1964/*
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e5e7d7856454..c0cf4acda7de 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -86,6 +86,12 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK);
86MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); 86MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD);
87MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); 87MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
88 88
89#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
90#define SD_MINORS 16
91#else
92#define SD_MINORS 0
93#endif
94
89static int sd_revalidate_disk(struct gendisk *); 95static int sd_revalidate_disk(struct gendisk *);
90static int sd_probe(struct device *); 96static int sd_probe(struct device *);
91static int sd_remove(struct device *); 97static int sd_remove(struct device *);
@@ -159,7 +165,7 @@ sd_store_cache_type(struct device *dev, struct device_attribute *attr,
159 sd_print_sense_hdr(sdkp, &sshdr); 165 sd_print_sense_hdr(sdkp, &sshdr);
160 return -EINVAL; 166 return -EINVAL;
161 } 167 }
162 sd_revalidate_disk(sdkp->disk); 168 revalidate_disk(sdkp->disk);
163 return count; 169 return count;
164} 170}
165 171
@@ -377,7 +383,6 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
377 sector_t block = rq->sector; 383 sector_t block = rq->sector;
378 sector_t threshold; 384 sector_t threshold;
379 unsigned int this_count = rq->nr_sectors; 385 unsigned int this_count = rq->nr_sectors;
380 unsigned int timeout = sdp->timeout;
381 int ret; 386 int ret;
382 387
383 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 388 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
@@ -578,7 +583,6 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
578 SCpnt->transfersize = sdp->sector_size; 583 SCpnt->transfersize = sdp->sector_size;
579 SCpnt->underflow = this_count << 9; 584 SCpnt->underflow = this_count << 9;
580 SCpnt->allowed = SD_MAX_RETRIES; 585 SCpnt->allowed = SD_MAX_RETRIES;
581 SCpnt->timeout_per_command = timeout;
582 586
583 /* 587 /*
584 * This indicates that the command is ready from our end to be 588 * This indicates that the command is ready from our end to be
@@ -910,7 +914,7 @@ static void sd_rescan(struct device *dev)
910 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); 914 struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
911 915
912 if (sdkp) { 916 if (sdkp) {
913 sd_revalidate_disk(sdkp->disk); 917 revalidate_disk(sdkp->disk);
914 scsi_disk_put(sdkp); 918 scsi_disk_put(sdkp);
915 } 919 }
916} 920}
@@ -1764,6 +1768,52 @@ static int sd_revalidate_disk(struct gendisk *disk)
1764} 1768}
1765 1769
1766/** 1770/**
1771 * sd_format_disk_name - format disk name
1772 * @prefix: name prefix - ie. "sd" for SCSI disks
1773 * @index: index of the disk to format name for
1774 * @buf: output buffer
1775 * @buflen: length of the output buffer
1776 *
1777 * SCSI disk names starts at sda. The 26th device is sdz and the
1778 * 27th is sdaa. The last one for two lettered suffix is sdzz
1779 * which is followed by sdaaa.
1780 *
1781 * This is basically 26 base counting with one extra 'nil' entry
1782 * at the beggining from the second digit on and can be
1783 * determined using similar method as 26 base conversion with the
1784 * index shifted -1 after each digit is computed.
1785 *
1786 * CONTEXT:
1787 * Don't care.
1788 *
1789 * RETURNS:
1790 * 0 on success, -errno on failure.
1791 */
1792static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen)
1793{
1794 const int base = 'z' - 'a' + 1;
1795 char *begin = buf + strlen(prefix);
1796 char *end = buf + buflen;
1797 char *p;
1798 int unit;
1799
1800 p = end - 1;
1801 *p = '\0';
1802 unit = base;
1803 do {
1804 if (p == begin)
1805 return -EINVAL;
1806 *--p = 'a' + (index % unit);
1807 index = (index / unit) - 1;
1808 } while (index >= 0);
1809
1810 memmove(begin, p, end - p);
1811 memcpy(buf, prefix, strlen(prefix));
1812
1813 return 0;
1814}
1815
1816/**
1767 * sd_probe - called during driver initialization and whenever a 1817 * sd_probe - called during driver initialization and whenever a
1768 * new scsi device is attached to the system. It is called once 1818 * new scsi device is attached to the system. It is called once
1769 * for each scsi device (not just disks) present. 1819 * for each scsi device (not just disks) present.
@@ -1801,7 +1851,7 @@ static int sd_probe(struct device *dev)
1801 if (!sdkp) 1851 if (!sdkp)
1802 goto out; 1852 goto out;
1803 1853
1804 gd = alloc_disk(16); 1854 gd = alloc_disk(SD_MINORS);
1805 if (!gd) 1855 if (!gd)
1806 goto out_free; 1856 goto out_free;
1807 1857
@@ -1815,8 +1865,8 @@ static int sd_probe(struct device *dev)
1815 if (error) 1865 if (error)
1816 goto out_put; 1866 goto out_put;
1817 1867
1818 error = -EBUSY; 1868 error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN);
1819 if (index >= SD_MAX_DISKS) 1869 if (error)
1820 goto out_free_index; 1870 goto out_free_index;
1821 1871
1822 sdkp->device = sdp; 1872 sdkp->device = sdp;
@@ -1826,11 +1876,12 @@ static int sd_probe(struct device *dev)
1826 sdkp->openers = 0; 1876 sdkp->openers = 0;
1827 sdkp->previous_state = 1; 1877 sdkp->previous_state = 1;
1828 1878
1829 if (!sdp->timeout) { 1879 if (!sdp->request_queue->rq_timeout) {
1830 if (sdp->type != TYPE_MOD) 1880 if (sdp->type != TYPE_MOD)
1831 sdp->timeout = SD_TIMEOUT; 1881 blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT);
1832 else 1882 else
1833 sdp->timeout = SD_MOD_TIMEOUT; 1883 blk_queue_rq_timeout(sdp->request_queue,
1884 SD_MOD_TIMEOUT);
1834 } 1885 }
1835 1886
1836 device_initialize(&sdkp->dev); 1887 device_initialize(&sdkp->dev);
@@ -1843,24 +1894,12 @@ static int sd_probe(struct device *dev)
1843 1894
1844 get_device(&sdp->sdev_gendev); 1895 get_device(&sdp->sdev_gendev);
1845 1896
1846 gd->major = sd_major((index & 0xf0) >> 4); 1897 if (index < SD_MAX_DISKS) {
1847 gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); 1898 gd->major = sd_major((index & 0xf0) >> 4);
1848 gd->minors = 16; 1899 gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
1849 gd->fops = &sd_fops; 1900 gd->minors = SD_MINORS;
1850
1851 if (index < 26) {
1852 sprintf(gd->disk_name, "sd%c", 'a' + index % 26);
1853 } else if (index < (26 + 1) * 26) {
1854 sprintf(gd->disk_name, "sd%c%c",
1855 'a' + index / 26 - 1,'a' + index % 26);
1856 } else {
1857 const unsigned int m1 = (index / 26 - 1) / 26 - 1;
1858 const unsigned int m2 = (index / 26 - 1) % 26;
1859 const unsigned int m3 = index % 26;
1860 sprintf(gd->disk_name, "sd%c%c%c",
1861 'a' + m1, 'a' + m2, 'a' + m3);
1862 } 1901 }
1863 1902 gd->fops = &sd_fops;
1864 gd->private_data = &sdkp->driver; 1903 gd->private_data = &sdkp->driver;
1865 gd->queue = sdkp->device->request_queue; 1904 gd->queue = sdkp->device->request_queue;
1866 1905
@@ -1869,7 +1908,7 @@ static int sd_probe(struct device *dev)
1869 blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); 1908 blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
1870 1909
1871 gd->driverfs_dev = &sdp->sdev_gendev; 1910 gd->driverfs_dev = &sdp->sdev_gendev;
1872 gd->flags = GENHD_FL_DRIVERFS; 1911 gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS;
1873 if (sdp->removable) 1912 if (sdp->removable)
1874 gd->flags |= GENHD_FL_REMOVABLE; 1913 gd->flags |= GENHD_FL_REMOVABLE;
1875 1914
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 661f9f21650a..ba9b9bbd4e73 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -47,7 +47,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */
47#include <linux/seq_file.h> 47#include <linux/seq_file.h>
48#include <linux/blkdev.h> 48#include <linux/blkdev.h>
49#include <linux/delay.h> 49#include <linux/delay.h>
50#include <linux/scatterlist.h>
51#include <linux/blktrace_api.h> 50#include <linux/blktrace_api.h>
52#include <linux/smp_lock.h> 51#include <linux/smp_lock.h>
53 52
@@ -69,7 +68,6 @@ static void sg_proc_cleanup(void);
69#endif 68#endif
70 69
71#define SG_ALLOW_DIO_DEF 0 70#define SG_ALLOW_DIO_DEF 0
72#define SG_ALLOW_DIO_CODE /* compile out by commenting this define */
73 71
74#define SG_MAX_DEVS 32768 72#define SG_MAX_DEVS 32768
75 73
@@ -118,8 +116,8 @@ typedef struct sg_scatter_hold { /* holding area for scsi scatter gather info */
118 unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */ 116 unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */
119 unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */ 117 unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */
120 unsigned bufflen; /* Size of (aggregate) data buffer */ 118 unsigned bufflen; /* Size of (aggregate) data buffer */
121 unsigned b_malloc_len; /* actual len malloc'ed in buffer */ 119 struct page **pages;
122 struct scatterlist *buffer;/* scatter list */ 120 int page_order;
123 char dio_in_use; /* 0->indirect IO (or mmap), 1->dio */ 121 char dio_in_use; /* 0->indirect IO (or mmap), 1->dio */
124 unsigned char cmd_opcode; /* first byte of command */ 122 unsigned char cmd_opcode; /* first byte of command */
125} Sg_scatter_hold; 123} Sg_scatter_hold;
@@ -137,6 +135,8 @@ typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */
137 char orphan; /* 1 -> drop on sight, 0 -> normal */ 135 char orphan; /* 1 -> drop on sight, 0 -> normal */
138 char sg_io_owned; /* 1 -> packet belongs to SG_IO */ 136 char sg_io_owned; /* 1 -> packet belongs to SG_IO */
139 volatile char done; /* 0->before bh, 1->before read, 2->read */ 137 volatile char done; /* 0->before bh, 1->before read, 2->read */
138 struct request *rq;
139 struct bio *bio;
140} Sg_request; 140} Sg_request;
141 141
142typedef struct sg_fd { /* holds the state of a file descriptor */ 142typedef struct sg_fd { /* holds the state of a file descriptor */
@@ -175,8 +175,8 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
175 175
176static int sg_fasync(int fd, struct file *filp, int mode); 176static int sg_fasync(int fd, struct file *filp, int mode);
177/* tasklet or soft irq callback */ 177/* tasklet or soft irq callback */
178static void sg_cmd_done(void *data, char *sense, int result, int resid); 178static void sg_rq_end_io(struct request *rq, int uptodate);
179static int sg_start_req(Sg_request * srp); 179static int sg_start_req(Sg_request *srp, unsigned char *cmd);
180static void sg_finish_rem_req(Sg_request * srp); 180static void sg_finish_rem_req(Sg_request * srp);
181static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); 181static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
182static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, 182static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp,
@@ -188,17 +188,11 @@ static ssize_t sg_new_write(Sg_fd *sfp, struct file *file,
188 int read_only, Sg_request **o_srp); 188 int read_only, Sg_request **o_srp);
189static int sg_common_write(Sg_fd * sfp, Sg_request * srp, 189static int sg_common_write(Sg_fd * sfp, Sg_request * srp,
190 unsigned char *cmnd, int timeout, int blocking); 190 unsigned char *cmnd, int timeout, int blocking);
191static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind,
192 int wr_xf, int *countp, unsigned char __user **up);
193static int sg_write_xfer(Sg_request * srp);
194static int sg_read_xfer(Sg_request * srp);
195static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); 191static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer);
196static void sg_remove_scat(Sg_scatter_hold * schp); 192static void sg_remove_scat(Sg_scatter_hold * schp);
197static void sg_build_reserve(Sg_fd * sfp, int req_size); 193static void sg_build_reserve(Sg_fd * sfp, int req_size);
198static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size); 194static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size);
199static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp); 195static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp);
200static struct page *sg_page_malloc(int rqSz, int lowDma, int *retSzp);
201static void sg_page_free(struct page *page, int size);
202static Sg_fd *sg_add_sfp(Sg_device * sdp, int dev); 196static Sg_fd *sg_add_sfp(Sg_device * sdp, int dev);
203static int sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); 197static int sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp);
204static void __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); 198static void __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp);
@@ -206,7 +200,6 @@ static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
206static Sg_request *sg_add_request(Sg_fd * sfp); 200static Sg_request *sg_add_request(Sg_fd * sfp);
207static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); 201static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
208static int sg_res_in_use(Sg_fd * sfp); 202static int sg_res_in_use(Sg_fd * sfp);
209static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len);
210static Sg_device *sg_get_dev(int dev); 203static Sg_device *sg_get_dev(int dev);
211#ifdef CONFIG_SCSI_PROC_FS 204#ifdef CONFIG_SCSI_PROC_FS
212static int sg_last_dev(void); 205static int sg_last_dev(void);
@@ -529,8 +522,7 @@ sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp)
529 err = -EFAULT; 522 err = -EFAULT;
530 goto err_out; 523 goto err_out;
531 } 524 }
532 err = sg_read_xfer(srp); 525err_out:
533 err_out:
534 sg_finish_rem_req(srp); 526 sg_finish_rem_req(srp);
535 return (0 == err) ? count : err; 527 return (0 == err) ? count : err;
536} 528}
@@ -612,7 +604,10 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos)
612 else 604 else
613 hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE; 605 hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE;
614 hp->dxfer_len = mxsize; 606 hp->dxfer_len = mxsize;
615 hp->dxferp = (char __user *)buf + cmd_size; 607 if (hp->dxfer_direction == SG_DXFER_TO_DEV)
608 hp->dxferp = (char __user *)buf + cmd_size;
609 else
610 hp->dxferp = NULL;
616 hp->sbp = NULL; 611 hp->sbp = NULL;
617 hp->timeout = old_hdr.reply_len; /* structure abuse ... */ 612 hp->timeout = old_hdr.reply_len; /* structure abuse ... */
618 hp->flags = input_size; /* structure abuse ... */ 613 hp->flags = input_size; /* structure abuse ... */
@@ -732,16 +727,12 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
732 SCSI_LOG_TIMEOUT(4, printk("sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", 727 SCSI_LOG_TIMEOUT(4, printk("sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n",
733 (int) cmnd[0], (int) hp->cmd_len)); 728 (int) cmnd[0], (int) hp->cmd_len));
734 729
735 if ((k = sg_start_req(srp))) { 730 k = sg_start_req(srp, cmnd);
731 if (k) {
736 SCSI_LOG_TIMEOUT(1, printk("sg_common_write: start_req err=%d\n", k)); 732 SCSI_LOG_TIMEOUT(1, printk("sg_common_write: start_req err=%d\n", k));
737 sg_finish_rem_req(srp); 733 sg_finish_rem_req(srp);
738 return k; /* probably out of space --> ENOMEM */ 734 return k; /* probably out of space --> ENOMEM */
739 } 735 }
740 if ((k = sg_write_xfer(srp))) {
741 SCSI_LOG_TIMEOUT(1, printk("sg_common_write: write_xfer, bad address\n"));
742 sg_finish_rem_req(srp);
743 return k;
744 }
745 if (sdp->detached) { 736 if (sdp->detached) {
746 sg_finish_rem_req(srp); 737 sg_finish_rem_req(srp);
747 return -ENODEV; 738 return -ENODEV;
@@ -763,20 +754,11 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
763 break; 754 break;
764 } 755 }
765 hp->duration = jiffies_to_msecs(jiffies); 756 hp->duration = jiffies_to_msecs(jiffies);
766/* Now send everything of to mid-level. The next time we hear about this 757
767 packet is when sg_cmd_done() is called (i.e. a callback). */ 758 srp->rq->timeout = timeout;
768 if (scsi_execute_async(sdp->device, cmnd, hp->cmd_len, data_dir, srp->data.buffer, 759 blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk,
769 hp->dxfer_len, srp->data.k_use_sg, timeout, 760 srp->rq, 1, sg_rq_end_io);
770 SG_DEFAULT_RETRIES, srp, sg_cmd_done, 761 return 0;
771 GFP_ATOMIC)) {
772 SCSI_LOG_TIMEOUT(1, printk("sg_common_write: scsi_execute_async failed\n"));
773 /*
774 * most likely out of mem, but could also be a bad map
775 */
776 sg_finish_rem_req(srp);
777 return -ENOMEM;
778 } else
779 return 0;
780} 762}
781 763
782static int 764static int
@@ -1192,8 +1174,7 @@ sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1192 Sg_fd *sfp; 1174 Sg_fd *sfp;
1193 unsigned long offset, len, sa; 1175 unsigned long offset, len, sa;
1194 Sg_scatter_hold *rsv_schp; 1176 Sg_scatter_hold *rsv_schp;
1195 struct scatterlist *sg; 1177 int k, length;
1196 int k;
1197 1178
1198 if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data))) 1179 if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data)))
1199 return VM_FAULT_SIGBUS; 1180 return VM_FAULT_SIGBUS;
@@ -1203,15 +1184,14 @@ sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1203 return VM_FAULT_SIGBUS; 1184 return VM_FAULT_SIGBUS;
1204 SCSI_LOG_TIMEOUT(3, printk("sg_vma_fault: offset=%lu, scatg=%d\n", 1185 SCSI_LOG_TIMEOUT(3, printk("sg_vma_fault: offset=%lu, scatg=%d\n",
1205 offset, rsv_schp->k_use_sg)); 1186 offset, rsv_schp->k_use_sg));
1206 sg = rsv_schp->buffer;
1207 sa = vma->vm_start; 1187 sa = vma->vm_start;
1208 for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end); 1188 length = 1 << (PAGE_SHIFT + rsv_schp->page_order);
1209 ++k, sg = sg_next(sg)) { 1189 for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) {
1210 len = vma->vm_end - sa; 1190 len = vma->vm_end - sa;
1211 len = (len < sg->length) ? len : sg->length; 1191 len = (len < length) ? len : length;
1212 if (offset < len) { 1192 if (offset < len) {
1213 struct page *page; 1193 struct page *page = nth_page(rsv_schp->pages[k],
1214 page = virt_to_page(page_address(sg_page(sg)) + offset); 1194 offset >> PAGE_SHIFT);
1215 get_page(page); /* increment page count */ 1195 get_page(page); /* increment page count */
1216 vmf->page = page; 1196 vmf->page = page;
1217 return 0; /* success */ 1197 return 0; /* success */
@@ -1233,8 +1213,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
1233 Sg_fd *sfp; 1213 Sg_fd *sfp;
1234 unsigned long req_sz, len, sa; 1214 unsigned long req_sz, len, sa;
1235 Sg_scatter_hold *rsv_schp; 1215 Sg_scatter_hold *rsv_schp;
1236 int k; 1216 int k, length;
1237 struct scatterlist *sg;
1238 1217
1239 if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data))) 1218 if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data)))
1240 return -ENXIO; 1219 return -ENXIO;
@@ -1248,11 +1227,10 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
1248 return -ENOMEM; /* cannot map more than reserved buffer */ 1227 return -ENOMEM; /* cannot map more than reserved buffer */
1249 1228
1250 sa = vma->vm_start; 1229 sa = vma->vm_start;
1251 sg = rsv_schp->buffer; 1230 length = 1 << (PAGE_SHIFT + rsv_schp->page_order);
1252 for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end); 1231 for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) {
1253 ++k, sg = sg_next(sg)) {
1254 len = vma->vm_end - sa; 1232 len = vma->vm_end - sa;
1255 len = (len < sg->length) ? len : sg->length; 1233 len = (len < length) ? len : length;
1256 sa += len; 1234 sa += len;
1257 } 1235 }
1258 1236
@@ -1263,16 +1241,19 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
1263 return 0; 1241 return 0;
1264} 1242}
1265 1243
1266/* This function is a "bottom half" handler that is called by the 1244/*
1267 * mid level when a command is completed (or has failed). */ 1245 * This function is a "bottom half" handler that is called by the mid
1268static void 1246 * level when a command is completed (or has failed).
1269sg_cmd_done(void *data, char *sense, int result, int resid) 1247 */
1248static void sg_rq_end_io(struct request *rq, int uptodate)
1270{ 1249{
1271 Sg_request *srp = data; 1250 struct sg_request *srp = rq->end_io_data;
1272 Sg_device *sdp = NULL; 1251 Sg_device *sdp = NULL;
1273 Sg_fd *sfp; 1252 Sg_fd *sfp;
1274 unsigned long iflags; 1253 unsigned long iflags;
1275 unsigned int ms; 1254 unsigned int ms;
1255 char *sense;
1256 int result, resid;
1276 1257
1277 if (NULL == srp) { 1258 if (NULL == srp) {
1278 printk(KERN_ERR "sg_cmd_done: NULL request\n"); 1259 printk(KERN_ERR "sg_cmd_done: NULL request\n");
@@ -1286,6 +1267,9 @@ sg_cmd_done(void *data, char *sense, int result, int resid)
1286 return; 1267 return;
1287 } 1268 }
1288 1269
1270 sense = rq->sense;
1271 result = rq->errors;
1272 resid = rq->data_len;
1289 1273
1290 SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n", 1274 SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n",
1291 sdp->disk->disk_name, srp->header.pack_id, result)); 1275 sdp->disk->disk_name, srp->header.pack_id, result));
@@ -1296,7 +1280,6 @@ sg_cmd_done(void *data, char *sense, int result, int resid)
1296 if (0 != result) { 1280 if (0 != result) {
1297 struct scsi_sense_hdr sshdr; 1281 struct scsi_sense_hdr sshdr;
1298 1282
1299 memcpy(srp->sense_b, sense, sizeof (srp->sense_b));
1300 srp->header.status = 0xff & result; 1283 srp->header.status = 0xff & result;
1301 srp->header.masked_status = status_byte(result); 1284 srp->header.masked_status = status_byte(result);
1302 srp->header.msg_status = msg_byte(result); 1285 srp->header.msg_status = msg_byte(result);
@@ -1634,37 +1617,79 @@ exit_sg(void)
1634 idr_destroy(&sg_index_idr); 1617 idr_destroy(&sg_index_idr);
1635} 1618}
1636 1619
1637static int 1620static int sg_start_req(Sg_request *srp, unsigned char *cmd)
1638sg_start_req(Sg_request * srp)
1639{ 1621{
1640 int res; 1622 int res;
1623 struct request *rq;
1641 Sg_fd *sfp = srp->parentfp; 1624 Sg_fd *sfp = srp->parentfp;
1642 sg_io_hdr_t *hp = &srp->header; 1625 sg_io_hdr_t *hp = &srp->header;
1643 int dxfer_len = (int) hp->dxfer_len; 1626 int dxfer_len = (int) hp->dxfer_len;
1644 int dxfer_dir = hp->dxfer_direction; 1627 int dxfer_dir = hp->dxfer_direction;
1628 unsigned int iov_count = hp->iovec_count;
1645 Sg_scatter_hold *req_schp = &srp->data; 1629 Sg_scatter_hold *req_schp = &srp->data;
1646 Sg_scatter_hold *rsv_schp = &sfp->reserve; 1630 Sg_scatter_hold *rsv_schp = &sfp->reserve;
1631 struct request_queue *q = sfp->parentdp->device->request_queue;
1632 struct rq_map_data *md, map_data;
1633 int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ;
1634
1635 SCSI_LOG_TIMEOUT(4, printk(KERN_INFO "sg_start_req: dxfer_len=%d\n",
1636 dxfer_len));
1637
1638 rq = blk_get_request(q, rw, GFP_ATOMIC);
1639 if (!rq)
1640 return -ENOMEM;
1641
1642 memcpy(rq->cmd, cmd, hp->cmd_len);
1643
1644 rq->cmd_len = hp->cmd_len;
1645 rq->cmd_type = REQ_TYPE_BLOCK_PC;
1646
1647 srp->rq = rq;
1648 rq->end_io_data = srp;
1649 rq->sense = srp->sense_b;
1650 rq->retries = SG_DEFAULT_RETRIES;
1647 1651
1648 SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len));
1649 if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) 1652 if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE))
1650 return 0; 1653 return 0;
1651 if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && 1654
1652 (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && 1655 if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO &&
1653 (!sfp->parentdp->device->host->unchecked_isa_dma)) { 1656 dxfer_dir != SG_DXFER_UNKNOWN && !iov_count &&
1654 res = sg_build_direct(srp, sfp, dxfer_len); 1657 !sfp->parentdp->device->host->unchecked_isa_dma &&
1655 if (res <= 0) /* -ve -> error, 0 -> done, 1 -> try indirect */ 1658 blk_rq_aligned(q, hp->dxferp, dxfer_len))
1656 return res; 1659 md = NULL;
1657 } 1660 else
1658 if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) 1661 md = &map_data;
1659 sg_link_reserve(sfp, srp, dxfer_len); 1662
1660 else { 1663 if (md) {
1661 res = sg_build_indirect(req_schp, sfp, dxfer_len); 1664 if (!sg_res_in_use(sfp) && dxfer_len <= rsv_schp->bufflen)
1662 if (res) { 1665 sg_link_reserve(sfp, srp, dxfer_len);
1663 sg_remove_scat(req_schp); 1666 else {
1664 return res; 1667 res = sg_build_indirect(req_schp, sfp, dxfer_len);
1668 if (res)
1669 return res;
1665 } 1670 }
1671
1672 md->pages = req_schp->pages;
1673 md->page_order = req_schp->page_order;
1674 md->nr_entries = req_schp->k_use_sg;
1666 } 1675 }
1667 return 0; 1676
1677 if (iov_count)
1678 res = blk_rq_map_user_iov(q, rq, md, hp->dxferp, iov_count,
1679 hp->dxfer_len, GFP_ATOMIC);
1680 else
1681 res = blk_rq_map_user(q, rq, md, hp->dxferp,
1682 hp->dxfer_len, GFP_ATOMIC);
1683
1684 if (!res) {
1685 srp->bio = rq->bio;
1686
1687 if (!md) {
1688 req_schp->dio_in_use = 1;
1689 hp->info |= SG_INFO_DIRECT_IO;
1690 }
1691 }
1692 return res;
1668} 1693}
1669 1694
1670static void 1695static void
@@ -1678,186 +1703,37 @@ sg_finish_rem_req(Sg_request * srp)
1678 sg_unlink_reserve(sfp, srp); 1703 sg_unlink_reserve(sfp, srp);
1679 else 1704 else
1680 sg_remove_scat(req_schp); 1705 sg_remove_scat(req_schp);
1706
1707 if (srp->rq) {
1708 if (srp->bio)
1709 blk_rq_unmap_user(srp->bio);
1710
1711 blk_put_request(srp->rq);
1712 }
1713
1681 sg_remove_request(sfp, srp); 1714 sg_remove_request(sfp, srp);
1682} 1715}
1683 1716
1684static int 1717static int
1685sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) 1718sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize)
1686{ 1719{
1687 int sg_bufflen = tablesize * sizeof(struct scatterlist); 1720 int sg_bufflen = tablesize * sizeof(struct page *);
1688 gfp_t gfp_flags = GFP_ATOMIC | __GFP_NOWARN; 1721 gfp_t gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
1689 1722
1690 /* 1723 schp->pages = kzalloc(sg_bufflen, gfp_flags);
1691 * TODO: test without low_dma, we should not need it since 1724 if (!schp->pages)
1692 * the block layer will bounce the buffer for us
1693 *
1694 * XXX(hch): we shouldn't need GFP_DMA for the actual S/G list.
1695 */
1696 if (sfp->low_dma)
1697 gfp_flags |= GFP_DMA;
1698 schp->buffer = kzalloc(sg_bufflen, gfp_flags);
1699 if (!schp->buffer)
1700 return -ENOMEM; 1725 return -ENOMEM;
1701 sg_init_table(schp->buffer, tablesize);
1702 schp->sglist_len = sg_bufflen; 1726 schp->sglist_len = sg_bufflen;
1703 return tablesize; /* number of scat_gath elements allocated */ 1727 return tablesize; /* number of scat_gath elements allocated */
1704} 1728}
1705 1729
1706#ifdef SG_ALLOW_DIO_CODE
1707/* vvvvvvvv following code borrowed from st driver's direct IO vvvvvvvvv */
1708 /* TODO: hopefully we can use the generic block layer code */
1709
1710/* Pin down user pages and put them into a scatter gather list. Returns <= 0 if
1711 - mapping of all pages not successful
1712 (i.e., either completely successful or fails)
1713*/
1714static int
1715st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages,
1716 unsigned long uaddr, size_t count, int rw)
1717{
1718 unsigned long end = (uaddr + count + PAGE_SIZE - 1) >> PAGE_SHIFT;
1719 unsigned long start = uaddr >> PAGE_SHIFT;
1720 const int nr_pages = end - start;
1721 int res, i, j;
1722 struct page **pages;
1723
1724 /* User attempted Overflow! */
1725 if ((uaddr + count) < uaddr)
1726 return -EINVAL;
1727
1728 /* Too big */
1729 if (nr_pages > max_pages)
1730 return -ENOMEM;
1731
1732 /* Hmm? */
1733 if (count == 0)
1734 return 0;
1735
1736 if ((pages = kmalloc(max_pages * sizeof(*pages), GFP_ATOMIC)) == NULL)
1737 return -ENOMEM;
1738
1739 /* Try to fault in all of the necessary pages */
1740 down_read(&current->mm->mmap_sem);
1741 /* rw==READ means read from drive, write into memory area */
1742 res = get_user_pages(
1743 current,
1744 current->mm,
1745 uaddr,
1746 nr_pages,
1747 rw == READ,
1748 0, /* don't force */
1749 pages,
1750 NULL);
1751 up_read(&current->mm->mmap_sem);
1752
1753 /* Errors and no page mapped should return here */
1754 if (res < nr_pages)
1755 goto out_unmap;
1756
1757 for (i=0; i < nr_pages; i++) {
1758 /* FIXME: flush superflous for rw==READ,
1759 * probably wrong function for rw==WRITE
1760 */
1761 flush_dcache_page(pages[i]);
1762 /* ?? Is locking needed? I don't think so */
1763 /* if (!trylock_page(pages[i]))
1764 goto out_unlock; */
1765 }
1766
1767 sg_set_page(sgl, pages[0], 0, uaddr & ~PAGE_MASK);
1768 if (nr_pages > 1) {
1769 sgl[0].length = PAGE_SIZE - sgl[0].offset;
1770 count -= sgl[0].length;
1771 for (i=1; i < nr_pages ; i++)
1772 sg_set_page(&sgl[i], pages[i], count < PAGE_SIZE ? count : PAGE_SIZE, 0);
1773 }
1774 else {
1775 sgl[0].length = count;
1776 }
1777
1778 kfree(pages);
1779 return nr_pages;
1780
1781 out_unmap:
1782 if (res > 0) {
1783 for (j=0; j < res; j++)
1784 page_cache_release(pages[j]);
1785 res = 0;
1786 }
1787 kfree(pages);
1788 return res;
1789}
1790
1791
1792/* And unmap them... */
1793static int
1794st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
1795 int dirtied)
1796{
1797 int i;
1798
1799 for (i=0; i < nr_pages; i++) {
1800 struct page *page = sg_page(&sgl[i]);
1801
1802 if (dirtied)
1803 SetPageDirty(page);
1804 /* unlock_page(page); */
1805 /* FIXME: cache flush missing for rw==READ
1806 * FIXME: call the correct reference counting function
1807 */
1808 page_cache_release(page);
1809 }
1810
1811 return 0;
1812}
1813
1814/* ^^^^^^^^ above code borrowed from st driver's direct IO ^^^^^^^^^ */
1815#endif
1816
1817
1818/* Returns: -ve -> error, 0 -> done, 1 -> try indirect */
1819static int
1820sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len)
1821{
1822#ifdef SG_ALLOW_DIO_CODE
1823 sg_io_hdr_t *hp = &srp->header;
1824 Sg_scatter_hold *schp = &srp->data;
1825 int sg_tablesize = sfp->parentdp->sg_tablesize;
1826 int mx_sc_elems, res;
1827 struct scsi_device *sdev = sfp->parentdp->device;
1828
1829 if (((unsigned long)hp->dxferp &
1830 queue_dma_alignment(sdev->request_queue)) != 0)
1831 return 1;
1832
1833 mx_sc_elems = sg_build_sgat(schp, sfp, sg_tablesize);
1834 if (mx_sc_elems <= 0) {
1835 return 1;
1836 }
1837 res = st_map_user_pages(schp->buffer, mx_sc_elems,
1838 (unsigned long)hp->dxferp, dxfer_len,
1839 (SG_DXFER_TO_DEV == hp->dxfer_direction) ? 1 : 0);
1840 if (res <= 0) {
1841 sg_remove_scat(schp);
1842 return 1;
1843 }
1844 schp->k_use_sg = res;
1845 schp->dio_in_use = 1;
1846 hp->info |= SG_INFO_DIRECT_IO;
1847 return 0;
1848#else
1849 return 1;
1850#endif
1851}
1852
1853static int 1730static int
1854sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) 1731sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
1855{ 1732{
1856 struct scatterlist *sg; 1733 int ret_sz = 0, i, k, rem_sz, num, mx_sc_elems;
1857 int ret_sz = 0, k, rem_sz, num, mx_sc_elems;
1858 int sg_tablesize = sfp->parentdp->sg_tablesize; 1734 int sg_tablesize = sfp->parentdp->sg_tablesize;
1859 int blk_size = buff_size; 1735 int blk_size = buff_size, order;
1860 struct page *p = NULL; 1736 gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
1861 1737
1862 if (blk_size < 0) 1738 if (blk_size < 0)
1863 return -EFAULT; 1739 return -EFAULT;
@@ -1881,15 +1757,26 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
1881 } else 1757 } else
1882 scatter_elem_sz_prev = num; 1758 scatter_elem_sz_prev = num;
1883 } 1759 }
1884 for (k = 0, sg = schp->buffer, rem_sz = blk_size; 1760
1885 (rem_sz > 0) && (k < mx_sc_elems); 1761 if (sfp->low_dma)
1886 ++k, rem_sz -= ret_sz, sg = sg_next(sg)) { 1762 gfp_mask |= GFP_DMA;
1887 1763
1764 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
1765 gfp_mask |= __GFP_ZERO;
1766
1767 order = get_order(num);
1768retry:
1769 ret_sz = 1 << (PAGE_SHIFT + order);
1770
1771 for (k = 0, rem_sz = blk_size; rem_sz > 0 && k < mx_sc_elems;
1772 k++, rem_sz -= ret_sz) {
1773
1888 num = (rem_sz > scatter_elem_sz_prev) ? 1774 num = (rem_sz > scatter_elem_sz_prev) ?
1889 scatter_elem_sz_prev : rem_sz; 1775 scatter_elem_sz_prev : rem_sz;
1890 p = sg_page_malloc(num, sfp->low_dma, &ret_sz); 1776
1891 if (!p) 1777 schp->pages[k] = alloc_pages(gfp_mask, order);
1892 return -ENOMEM; 1778 if (!schp->pages[k])
1779 goto out;
1893 1780
1894 if (num == scatter_elem_sz_prev) { 1781 if (num == scatter_elem_sz_prev) {
1895 if (unlikely(ret_sz > scatter_elem_sz_prev)) { 1782 if (unlikely(ret_sz > scatter_elem_sz_prev)) {
@@ -1897,12 +1784,12 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
1897 scatter_elem_sz_prev = ret_sz; 1784 scatter_elem_sz_prev = ret_sz;
1898 } 1785 }
1899 } 1786 }
1900 sg_set_page(sg, p, (ret_sz > num) ? num : ret_sz, 0);
1901 1787
1902 SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, " 1788 SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, "
1903 "ret_sz=%d\n", k, num, ret_sz)); 1789 "ret_sz=%d\n", k, num, ret_sz));
1904 } /* end of for loop */ 1790 } /* end of for loop */
1905 1791
1792 schp->page_order = order;
1906 schp->k_use_sg = k; 1793 schp->k_use_sg = k;
1907 SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k_use_sg=%d, " 1794 SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k_use_sg=%d, "
1908 "rem_sz=%d\n", k, rem_sz)); 1795 "rem_sz=%d\n", k, rem_sz));
@@ -1910,223 +1797,42 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size)
1910 schp->bufflen = blk_size; 1797 schp->bufflen = blk_size;
1911 if (rem_sz > 0) /* must have failed */ 1798 if (rem_sz > 0) /* must have failed */
1912 return -ENOMEM; 1799 return -ENOMEM;
1913
1914 return 0; 1800 return 0;
1915} 1801out:
1916 1802 for (i = 0; i < k; i++)
1917static int 1803 __free_pages(schp->pages[k], order);
1918sg_write_xfer(Sg_request * srp)
1919{
1920 sg_io_hdr_t *hp = &srp->header;
1921 Sg_scatter_hold *schp = &srp->data;
1922 struct scatterlist *sg = schp->buffer;
1923 int num_xfer = 0;
1924 int j, k, onum, usglen, ksglen, res;
1925 int iovec_count = (int) hp->iovec_count;
1926 int dxfer_dir = hp->dxfer_direction;
1927 unsigned char *p;
1928 unsigned char __user *up;
1929 int new_interface = ('\0' == hp->interface_id) ? 0 : 1;
1930
1931 if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_TO_DEV == dxfer_dir) ||
1932 (SG_DXFER_TO_FROM_DEV == dxfer_dir)) {
1933 num_xfer = (int) (new_interface ? hp->dxfer_len : hp->flags);
1934 if (schp->bufflen < num_xfer)
1935 num_xfer = schp->bufflen;
1936 }
1937 if ((num_xfer <= 0) || (schp->dio_in_use) ||
1938 (new_interface
1939 && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags)))
1940 return 0;
1941
1942 SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n",
1943 num_xfer, iovec_count, schp->k_use_sg));
1944 if (iovec_count) {
1945 onum = iovec_count;
1946 if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum))
1947 return -EFAULT;
1948 } else
1949 onum = 1;
1950
1951 ksglen = sg->length;
1952 p = page_address(sg_page(sg));
1953 for (j = 0, k = 0; j < onum; ++j) {
1954 res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up);
1955 if (res)
1956 return res;
1957
1958 for (; p; sg = sg_next(sg), ksglen = sg->length,
1959 p = page_address(sg_page(sg))) {
1960 if (usglen <= 0)
1961 break;
1962 if (ksglen > usglen) {
1963 if (usglen >= num_xfer) {
1964 if (__copy_from_user(p, up, num_xfer))
1965 return -EFAULT;
1966 return 0;
1967 }
1968 if (__copy_from_user(p, up, usglen))
1969 return -EFAULT;
1970 p += usglen;
1971 ksglen -= usglen;
1972 break;
1973 } else {
1974 if (ksglen >= num_xfer) {
1975 if (__copy_from_user(p, up, num_xfer))
1976 return -EFAULT;
1977 return 0;
1978 }
1979 if (__copy_from_user(p, up, ksglen))
1980 return -EFAULT;
1981 up += ksglen;
1982 usglen -= ksglen;
1983 }
1984 ++k;
1985 if (k >= schp->k_use_sg)
1986 return 0;
1987 }
1988 }
1989
1990 return 0;
1991}
1992 1804
1993static int 1805 if (--order >= 0)
1994sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind, 1806 goto retry;
1995 int wr_xf, int *countp, unsigned char __user **up)
1996{
1997 int num_xfer = (int) hp->dxfer_len;
1998 unsigned char __user *p = hp->dxferp;
1999 int count;
2000 1807
2001 if (0 == sg_num) { 1808 return -ENOMEM;
2002 if (wr_xf && ('\0' == hp->interface_id))
2003 count = (int) hp->flags; /* holds "old" input_size */
2004 else
2005 count = num_xfer;
2006 } else {
2007 sg_iovec_t iovec;
2008 if (__copy_from_user(&iovec, p + ind*SZ_SG_IOVEC, SZ_SG_IOVEC))
2009 return -EFAULT;
2010 p = iovec.iov_base;
2011 count = (int) iovec.iov_len;
2012 }
2013 if (!access_ok(wr_xf ? VERIFY_READ : VERIFY_WRITE, p, count))
2014 return -EFAULT;
2015 if (up)
2016 *up = p;
2017 if (countp)
2018 *countp = count;
2019 return 0;
2020} 1809}
2021 1810
2022static void 1811static void
2023sg_remove_scat(Sg_scatter_hold * schp) 1812sg_remove_scat(Sg_scatter_hold * schp)
2024{ 1813{
2025 SCSI_LOG_TIMEOUT(4, printk("sg_remove_scat: k_use_sg=%d\n", schp->k_use_sg)); 1814 SCSI_LOG_TIMEOUT(4, printk("sg_remove_scat: k_use_sg=%d\n", schp->k_use_sg));
2026 if (schp->buffer && (schp->sglist_len > 0)) { 1815 if (schp->pages && schp->sglist_len > 0) {
2027 struct scatterlist *sg = schp->buffer; 1816 if (!schp->dio_in_use) {
2028
2029 if (schp->dio_in_use) {
2030#ifdef SG_ALLOW_DIO_CODE
2031 st_unmap_user_pages(sg, schp->k_use_sg, TRUE);
2032#endif
2033 } else {
2034 int k; 1817 int k;
2035 1818
2036 for (k = 0; (k < schp->k_use_sg) && sg_page(sg); 1819 for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) {
2037 ++k, sg = sg_next(sg)) {
2038 SCSI_LOG_TIMEOUT(5, printk( 1820 SCSI_LOG_TIMEOUT(5, printk(
2039 "sg_remove_scat: k=%d, pg=0x%p, len=%d\n", 1821 "sg_remove_scat: k=%d, pg=0x%p\n",
2040 k, sg_page(sg), sg->length)); 1822 k, schp->pages[k]));
2041 sg_page_free(sg_page(sg), sg->length); 1823 __free_pages(schp->pages[k], schp->page_order);
2042 } 1824 }
2043 }
2044 kfree(schp->buffer);
2045 }
2046 memset(schp, 0, sizeof (*schp));
2047}
2048 1825
2049static int 1826 kfree(schp->pages);
2050sg_read_xfer(Sg_request * srp)
2051{
2052 sg_io_hdr_t *hp = &srp->header;
2053 Sg_scatter_hold *schp = &srp->data;
2054 struct scatterlist *sg = schp->buffer;
2055 int num_xfer = 0;
2056 int j, k, onum, usglen, ksglen, res;
2057 int iovec_count = (int) hp->iovec_count;
2058 int dxfer_dir = hp->dxfer_direction;
2059 unsigned char *p;
2060 unsigned char __user *up;
2061 int new_interface = ('\0' == hp->interface_id) ? 0 : 1;
2062
2063 if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_FROM_DEV == dxfer_dir)
2064 || (SG_DXFER_TO_FROM_DEV == dxfer_dir)) {
2065 num_xfer = hp->dxfer_len;
2066 if (schp->bufflen < num_xfer)
2067 num_xfer = schp->bufflen;
2068 }
2069 if ((num_xfer <= 0) || (schp->dio_in_use) ||
2070 (new_interface
2071 && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags)))
2072 return 0;
2073
2074 SCSI_LOG_TIMEOUT(4, printk("sg_read_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n",
2075 num_xfer, iovec_count, schp->k_use_sg));
2076 if (iovec_count) {
2077 onum = iovec_count;
2078 if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum))
2079 return -EFAULT;
2080 } else
2081 onum = 1;
2082
2083 p = page_address(sg_page(sg));
2084 ksglen = sg->length;
2085 for (j = 0, k = 0; j < onum; ++j) {
2086 res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up);
2087 if (res)
2088 return res;
2089
2090 for (; p; sg = sg_next(sg), ksglen = sg->length,
2091 p = page_address(sg_page(sg))) {
2092 if (usglen <= 0)
2093 break;
2094 if (ksglen > usglen) {
2095 if (usglen >= num_xfer) {
2096 if (__copy_to_user(up, p, num_xfer))
2097 return -EFAULT;
2098 return 0;
2099 }
2100 if (__copy_to_user(up, p, usglen))
2101 return -EFAULT;
2102 p += usglen;
2103 ksglen -= usglen;
2104 break;
2105 } else {
2106 if (ksglen >= num_xfer) {
2107 if (__copy_to_user(up, p, num_xfer))
2108 return -EFAULT;
2109 return 0;
2110 }
2111 if (__copy_to_user(up, p, ksglen))
2112 return -EFAULT;
2113 up += ksglen;
2114 usglen -= ksglen;
2115 }
2116 ++k;
2117 if (k >= schp->k_use_sg)
2118 return 0;
2119 } 1827 }
2120 } 1828 }
2121 1829 memset(schp, 0, sizeof (*schp));
2122 return 0;
2123} 1830}
2124 1831
2125static int 1832static int
2126sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) 1833sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer)
2127{ 1834{
2128 Sg_scatter_hold *schp = &srp->data; 1835 Sg_scatter_hold *schp = &srp->data;
2129 struct scatterlist *sg = schp->buffer;
2130 int k, num; 1836 int k, num;
2131 1837
2132 SCSI_LOG_TIMEOUT(4, printk("sg_read_oxfer: num_read_xfer=%d\n", 1838 SCSI_LOG_TIMEOUT(4, printk("sg_read_oxfer: num_read_xfer=%d\n",
@@ -2134,15 +1840,15 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer)
2134 if ((!outp) || (num_read_xfer <= 0)) 1840 if ((!outp) || (num_read_xfer <= 0))
2135 return 0; 1841 return 0;
2136 1842
2137 for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) { 1843 num = 1 << (PAGE_SHIFT + schp->page_order);
2138 num = sg->length; 1844 for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) {
2139 if (num > num_read_xfer) { 1845 if (num > num_read_xfer) {
2140 if (__copy_to_user(outp, page_address(sg_page(sg)), 1846 if (__copy_to_user(outp, page_address(schp->pages[k]),
2141 num_read_xfer)) 1847 num_read_xfer))
2142 return -EFAULT; 1848 return -EFAULT;
2143 break; 1849 break;
2144 } else { 1850 } else {
2145 if (__copy_to_user(outp, page_address(sg_page(sg)), 1851 if (__copy_to_user(outp, page_address(schp->pages[k]),
2146 num)) 1852 num))
2147 return -EFAULT; 1853 return -EFAULT;
2148 num_read_xfer -= num; 1854 num_read_xfer -= num;
@@ -2177,24 +1883,21 @@ sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size)
2177{ 1883{
2178 Sg_scatter_hold *req_schp = &srp->data; 1884 Sg_scatter_hold *req_schp = &srp->data;
2179 Sg_scatter_hold *rsv_schp = &sfp->reserve; 1885 Sg_scatter_hold *rsv_schp = &sfp->reserve;
2180 struct scatterlist *sg = rsv_schp->buffer;
2181 int k, num, rem; 1886 int k, num, rem;
2182 1887
2183 srp->res_used = 1; 1888 srp->res_used = 1;
2184 SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size)); 1889 SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size));
2185 rem = size; 1890 rem = size;
2186 1891
2187 for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) { 1892 num = 1 << (PAGE_SHIFT + rsv_schp->page_order);
2188 num = sg->length; 1893 for (k = 0; k < rsv_schp->k_use_sg; k++) {
2189 if (rem <= num) { 1894 if (rem <= num) {
2190 sfp->save_scat_len = num;
2191 sg->length = rem;
2192 req_schp->k_use_sg = k + 1; 1895 req_schp->k_use_sg = k + 1;
2193 req_schp->sglist_len = rsv_schp->sglist_len; 1896 req_schp->sglist_len = rsv_schp->sglist_len;
2194 req_schp->buffer = rsv_schp->buffer; 1897 req_schp->pages = rsv_schp->pages;
2195 1898
2196 req_schp->bufflen = size; 1899 req_schp->bufflen = size;
2197 req_schp->b_malloc_len = rsv_schp->b_malloc_len; 1900 req_schp->page_order = rsv_schp->page_order;
2198 break; 1901 break;
2199 } else 1902 } else
2200 rem -= num; 1903 rem -= num;
@@ -2208,22 +1911,13 @@ static void
2208sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) 1911sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp)
2209{ 1912{
2210 Sg_scatter_hold *req_schp = &srp->data; 1913 Sg_scatter_hold *req_schp = &srp->data;
2211 Sg_scatter_hold *rsv_schp = &sfp->reserve;
2212 1914
2213 SCSI_LOG_TIMEOUT(4, printk("sg_unlink_reserve: req->k_use_sg=%d\n", 1915 SCSI_LOG_TIMEOUT(4, printk("sg_unlink_reserve: req->k_use_sg=%d\n",
2214 (int) req_schp->k_use_sg)); 1916 (int) req_schp->k_use_sg));
2215 if ((rsv_schp->k_use_sg > 0) && (req_schp->k_use_sg > 0)) {
2216 struct scatterlist *sg = rsv_schp->buffer;
2217
2218 if (sfp->save_scat_len > 0)
2219 (sg + (req_schp->k_use_sg - 1))->length =
2220 (unsigned) sfp->save_scat_len;
2221 else
2222 SCSI_LOG_TIMEOUT(1, printk ("sg_unlink_reserve: BAD save_scat_len\n"));
2223 }
2224 req_schp->k_use_sg = 0; 1917 req_schp->k_use_sg = 0;
2225 req_schp->bufflen = 0; 1918 req_schp->bufflen = 0;
2226 req_schp->buffer = NULL; 1919 req_schp->pages = NULL;
1920 req_schp->page_order = 0;
2227 req_schp->sglist_len = 0; 1921 req_schp->sglist_len = 0;
2228 sfp->save_scat_len = 0; 1922 sfp->save_scat_len = 0;
2229 srp->res_used = 0; 1923 srp->res_used = 0;
@@ -2481,53 +2175,6 @@ sg_res_in_use(Sg_fd * sfp)
2481 return srp ? 1 : 0; 2175 return srp ? 1 : 0;
2482} 2176}
2483 2177
2484/* The size fetched (value output via retSzp) set when non-NULL return */
2485static struct page *
2486sg_page_malloc(int rqSz, int lowDma, int *retSzp)
2487{
2488 struct page *resp = NULL;
2489 gfp_t page_mask;
2490 int order, a_size;
2491 int resSz;
2492
2493 if ((rqSz <= 0) || (NULL == retSzp))
2494 return resp;
2495
2496 if (lowDma)
2497 page_mask = GFP_ATOMIC | GFP_DMA | __GFP_COMP | __GFP_NOWARN;
2498 else
2499 page_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
2500
2501 for (order = 0, a_size = PAGE_SIZE; a_size < rqSz;
2502 order++, a_size <<= 1) ;
2503 resSz = a_size; /* rounded up if necessary */
2504 resp = alloc_pages(page_mask, order);
2505 while ((!resp) && order) {
2506 --order;
2507 a_size >>= 1; /* divide by 2, until PAGE_SIZE */
2508 resp = alloc_pages(page_mask, order); /* try half */
2509 resSz = a_size;
2510 }
2511 if (resp) {
2512 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
2513 memset(page_address(resp), 0, resSz);
2514 *retSzp = resSz;
2515 }
2516 return resp;
2517}
2518
2519static void
2520sg_page_free(struct page *page, int size)
2521{
2522 int order, a_size;
2523
2524 if (!page)
2525 return;
2526 for (order = 0, a_size = PAGE_SIZE; a_size < size;
2527 order++, a_size <<= 1) ;
2528 __free_pages(page, order);
2529}
2530
2531#ifdef CONFIG_SCSI_PROC_FS 2178#ifdef CONFIG_SCSI_PROC_FS
2532static int 2179static int
2533sg_idr_max_id(int id, void *p, void *data) 2180sg_idr_max_id(int id, void *p, void *data)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 27f5bfd1def3..0f17009c99d2 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -331,7 +331,7 @@ static int sr_done(struct scsi_cmnd *SCpnt)
331 331
332static int sr_prep_fn(struct request_queue *q, struct request *rq) 332static int sr_prep_fn(struct request_queue *q, struct request *rq)
333{ 333{
334 int block=0, this_count, s_size, timeout = SR_TIMEOUT; 334 int block = 0, this_count, s_size;
335 struct scsi_cd *cd; 335 struct scsi_cd *cd;
336 struct scsi_cmnd *SCpnt; 336 struct scsi_cmnd *SCpnt;
337 struct scsi_device *sdp = q->queuedata; 337 struct scsi_device *sdp = q->queuedata;
@@ -461,7 +461,6 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq)
461 SCpnt->transfersize = cd->device->sector_size; 461 SCpnt->transfersize = cd->device->sector_size;
462 SCpnt->underflow = this_count << 9; 462 SCpnt->underflow = this_count << 9;
463 SCpnt->allowed = MAX_RETRIES; 463 SCpnt->allowed = MAX_RETRIES;
464 SCpnt->timeout_per_command = timeout;
465 464
466 /* 465 /*
467 * This indicates that the command is ready from our end to be 466 * This indicates that the command is ready from our end to be
@@ -620,6 +619,8 @@ static int sr_probe(struct device *dev)
620 disk->fops = &sr_bdops; 619 disk->fops = &sr_bdops;
621 disk->flags = GENHD_FL_CD; 620 disk->flags = GENHD_FL_CD;
622 621
622 blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
623
623 cd->device = sdev; 624 cd->device = sdev;
624 cd->disk = disk; 625 cd->disk = disk;
625 cd->driver = &sr_template; 626 cd->driver = &sr_template;
@@ -878,7 +879,7 @@ static void sr_kref_release(struct kref *kref)
878 struct gendisk *disk = cd->disk; 879 struct gendisk *disk = cd->disk;
879 880
880 spin_lock(&sr_index_lock); 881 spin_lock(&sr_index_lock);
881 clear_bit(disk->first_minor, sr_index_bits); 882 clear_bit(MINOR(disk_devt(disk)), sr_index_bits);
882 spin_unlock(&sr_index_lock); 883 spin_unlock(&sr_index_lock);
883 884
884 unregister_cdrom(&cd->cdi); 885 unregister_cdrom(&cd->cdi);
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index d39107b7669b..f4e6cde1fd0d 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -519,8 +519,8 @@ static int sym53c8xx_queue_command(struct scsi_cmnd *cmd,
519 * Shorten our settle_time if needed for 519 * Shorten our settle_time if needed for
520 * this command not to time out. 520 * this command not to time out.
521 */ 521 */
522 if (np->s.settle_time_valid && cmd->timeout_per_command) { 522 if (np->s.settle_time_valid && cmd->request->timeout) {
523 unsigned long tlimit = jiffies + cmd->timeout_per_command; 523 unsigned long tlimit = jiffies + cmd->request->timeout;
524 tlimit -= SYM_CONF_TIMER_INTERVAL*2; 524 tlimit -= SYM_CONF_TIMER_INTERVAL*2;
525 if (time_after(np->s.settle_time, tlimit)) { 525 if (time_after(np->s.settle_time, tlimit)) {
526 np->s.settle_time = tlimit; 526 np->s.settle_time = tlimit;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index c3e174b35fe6..19caf7c962ac 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -107,7 +107,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
107 BUG_ON(bip == NULL); 107 BUG_ON(bip == NULL);
108 108
109 /* A cloned bio doesn't own the integrity metadata */ 109 /* A cloned bio doesn't own the integrity metadata */
110 if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) 110 if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY)
111 && bip->bip_buf != NULL)
111 kfree(bip->bip_buf); 112 kfree(bip->bip_buf);
112 113
113 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); 114 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
@@ -150,6 +151,24 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
150} 151}
151EXPORT_SYMBOL(bio_integrity_add_page); 152EXPORT_SYMBOL(bio_integrity_add_page);
152 153
154static int bdev_integrity_enabled(struct block_device *bdev, int rw)
155{
156 struct blk_integrity *bi = bdev_get_integrity(bdev);
157
158 if (bi == NULL)
159 return 0;
160
161 if (rw == READ && bi->verify_fn != NULL &&
162 (bi->flags & INTEGRITY_FLAG_READ))
163 return 1;
164
165 if (rw == WRITE && bi->generate_fn != NULL &&
166 (bi->flags & INTEGRITY_FLAG_WRITE))
167 return 1;
168
169 return 0;
170}
171
153/** 172/**
154 * bio_integrity_enabled - Check whether integrity can be passed 173 * bio_integrity_enabled - Check whether integrity can be passed
155 * @bio: bio to check 174 * @bio: bio to check
@@ -313,6 +332,14 @@ static void bio_integrity_generate(struct bio *bio)
313 } 332 }
314} 333}
315 334
335static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
336{
337 if (bi)
338 return bi->tuple_size;
339
340 return 0;
341}
342
316/** 343/**
317 * bio_integrity_prep - Prepare bio for integrity I/O 344 * bio_integrity_prep - Prepare bio for integrity I/O
318 * @bio: bio to prepare 345 * @bio: bio to prepare
diff --git a/fs/bio.c b/fs/bio.c
index 3cba7ae34d75..77a55bcceedb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -30,7 +30,7 @@
30 30
31static struct kmem_cache *bio_slab __read_mostly; 31static struct kmem_cache *bio_slab __read_mostly;
32 32
33mempool_t *bio_split_pool __read_mostly; 33static mempool_t *bio_split_pool __read_mostly;
34 34
35/* 35/*
36 * if you change this list, also change bvec_alloc or things will 36 * if you change this list, also change bvec_alloc or things will
@@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
60 struct bio_vec *bvl; 60 struct bio_vec *bvl;
61 61
62 /* 62 /*
63 * see comment near bvec_array define! 63 * If 'bs' is given, lookup the pool and do the mempool alloc.
64 * If not, this is a bio_kmalloc() allocation and just do a
65 * kzalloc() for the exact number of vecs right away.
64 */ 66 */
65 switch (nr) { 67 if (bs) {
66 case 1 : *idx = 0; break; 68 /*
67 case 2 ... 4: *idx = 1; break; 69 * see comment near bvec_array define!
68 case 5 ... 16: *idx = 2; break; 70 */
69 case 17 ... 64: *idx = 3; break; 71 switch (nr) {
70 case 65 ... 128: *idx = 4; break; 72 case 1:
71 case 129 ... BIO_MAX_PAGES: *idx = 5; break; 73 *idx = 0;
74 break;
75 case 2 ... 4:
76 *idx = 1;
77 break;
78 case 5 ... 16:
79 *idx = 2;
80 break;
81 case 17 ... 64:
82 *idx = 3;
83 break;
84 case 65 ... 128:
85 *idx = 4;
86 break;
87 case 129 ... BIO_MAX_PAGES:
88 *idx = 5;
89 break;
72 default: 90 default:
73 return NULL; 91 return NULL;
74 } 92 }
75 /*
76 * idx now points to the pool we want to allocate from
77 */
78 93
79 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 94 /*
80 if (bvl) 95 * idx now points to the pool we want to allocate from
81 memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); 96 */
97 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
98 if (bvl)
99 memset(bvl, 0,
100 bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
101 } else
102 bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask);
82 103
83 return bvl; 104 return bvl;
84} 105}
@@ -107,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio)
107 bio_free(bio, fs_bio_set); 128 bio_free(bio, fs_bio_set);
108} 129}
109 130
131static void bio_kmalloc_destructor(struct bio *bio)
132{
133 kfree(bio->bi_io_vec);
134 kfree(bio);
135}
136
110void bio_init(struct bio *bio) 137void bio_init(struct bio *bio)
111{ 138{
112 memset(bio, 0, sizeof(*bio)); 139 memset(bio, 0, sizeof(*bio));
113 bio->bi_flags = 1 << BIO_UPTODATE; 140 bio->bi_flags = 1 << BIO_UPTODATE;
141 bio->bi_comp_cpu = -1;
114 atomic_set(&bio->bi_cnt, 1); 142 atomic_set(&bio->bi_cnt, 1);
115} 143}
116 144
@@ -118,19 +146,25 @@ void bio_init(struct bio *bio)
118 * bio_alloc_bioset - allocate a bio for I/O 146 * bio_alloc_bioset - allocate a bio for I/O
119 * @gfp_mask: the GFP_ mask given to the slab allocator 147 * @gfp_mask: the GFP_ mask given to the slab allocator
120 * @nr_iovecs: number of iovecs to pre-allocate 148 * @nr_iovecs: number of iovecs to pre-allocate
121 * @bs: the bio_set to allocate from 149 * @bs: the bio_set to allocate from. If %NULL, just use kmalloc
122 * 150 *
123 * Description: 151 * Description:
124 * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. 152 * bio_alloc_bioset will first try its own mempool to satisfy the allocation.
125 * If %__GFP_WAIT is set then we will block on the internal pool waiting 153 * If %__GFP_WAIT is set then we will block on the internal pool waiting
126 * for a &struct bio to become free. 154 * for a &struct bio to become free. If a %NULL @bs is passed in, we will
155 * fall back to just using @kmalloc to allocate the required memory.
127 * 156 *
128 * allocate bio and iovecs from the memory pools specified by the 157 * allocate bio and iovecs from the memory pools specified by the
129 * bio_set structure. 158 * bio_set structure, or @kmalloc if none given.
130 **/ 159 **/
131struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 160struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
132{ 161{
133 struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); 162 struct bio *bio;
163
164 if (bs)
165 bio = mempool_alloc(bs->bio_pool, gfp_mask);
166 else
167 bio = kmalloc(sizeof(*bio), gfp_mask);
134 168
135 if (likely(bio)) { 169 if (likely(bio)) {
136 struct bio_vec *bvl = NULL; 170 struct bio_vec *bvl = NULL;
@@ -141,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
141 175
142 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 176 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
143 if (unlikely(!bvl)) { 177 if (unlikely(!bvl)) {
144 mempool_free(bio, bs->bio_pool); 178 if (bs)
179 mempool_free(bio, bs->bio_pool);
180 else
181 kfree(bio);
145 bio = NULL; 182 bio = NULL;
146 goto out; 183 goto out;
147 } 184 }
@@ -164,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
164 return bio; 201 return bio;
165} 202}
166 203
204/*
205 * Like bio_alloc(), but doesn't use a mempool backing. This means that
206 * it CAN fail, but while bio_alloc() can only be used for allocations
207 * that have a short (finite) life span, bio_kmalloc() should be used
208 * for more permanent bio allocations (like allocating some bio's for
209 * initalization or setup purposes).
210 */
211struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
212{
213 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
214
215 if (bio)
216 bio->bi_destructor = bio_kmalloc_destructor;
217
218 return bio;
219}
220
167void zero_fill_bio(struct bio *bio) 221void zero_fill_bio(struct bio *bio)
168{ 222{
169 unsigned long flags; 223 unsigned long flags;
@@ -208,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
208 return bio->bi_phys_segments; 262 return bio->bi_phys_segments;
209} 263}
210 264
211inline int bio_hw_segments(struct request_queue *q, struct bio *bio)
212{
213 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
214 blk_recount_segments(q, bio);
215
216 return bio->bi_hw_segments;
217}
218
219/** 265/**
220 * __bio_clone - clone a bio 266 * __bio_clone - clone a bio
221 * @bio: destination bio 267 * @bio: destination bio
@@ -350,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
350 */ 396 */
351 397
352 while (bio->bi_phys_segments >= q->max_phys_segments 398 while (bio->bi_phys_segments >= q->max_phys_segments
353 || bio->bi_hw_segments >= q->max_hw_segments 399 || bio->bi_phys_segments >= q->max_hw_segments) {
354 || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) {
355 400
356 if (retried_segments) 401 if (retried_segments)
357 return 0; 402 return 0;
@@ -395,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
395 } 440 }
396 441
397 /* If we may be able to merge these biovecs, force a recount */ 442 /* If we may be able to merge these biovecs, force a recount */
398 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || 443 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
399 BIOVEC_VIRT_MERGEABLE(bvec-1, bvec)))
400 bio->bi_flags &= ~(1 << BIO_SEG_VALID); 444 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
401 445
402 bio->bi_vcnt++; 446 bio->bi_vcnt++;
403 bio->bi_phys_segments++; 447 bio->bi_phys_segments++;
404 bio->bi_hw_segments++;
405 done: 448 done:
406 bio->bi_size += len; 449 bio->bi_size += len;
407 return len; 450 return len;
@@ -449,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
449 492
450struct bio_map_data { 493struct bio_map_data {
451 struct bio_vec *iovecs; 494 struct bio_vec *iovecs;
452 int nr_sgvecs;
453 struct sg_iovec *sgvecs; 495 struct sg_iovec *sgvecs;
496 int nr_sgvecs;
497 int is_our_pages;
454}; 498};
455 499
456static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, 500static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
457 struct sg_iovec *iov, int iov_count) 501 struct sg_iovec *iov, int iov_count,
502 int is_our_pages)
458{ 503{
459 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); 504 memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
460 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); 505 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
461 bmd->nr_sgvecs = iov_count; 506 bmd->nr_sgvecs = iov_count;
507 bmd->is_our_pages = is_our_pages;
462 bio->bi_private = bmd; 508 bio->bi_private = bmd;
463} 509}
464 510
@@ -493,7 +539,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
493} 539}
494 540
495static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, 541static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
496 struct sg_iovec *iov, int iov_count, int uncopy) 542 struct sg_iovec *iov, int iov_count, int uncopy,
543 int do_free_page)
497{ 544{
498 int ret = 0, i; 545 int ret = 0, i;
499 struct bio_vec *bvec; 546 struct bio_vec *bvec;
@@ -536,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
536 } 583 }
537 } 584 }
538 585
539 if (uncopy) 586 if (do_free_page)
540 __free_page(bvec->bv_page); 587 __free_page(bvec->bv_page);
541 } 588 }
542 589
@@ -553,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
553int bio_uncopy_user(struct bio *bio) 600int bio_uncopy_user(struct bio *bio)
554{ 601{
555 struct bio_map_data *bmd = bio->bi_private; 602 struct bio_map_data *bmd = bio->bi_private;
556 int ret; 603 int ret = 0;
557
558 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1);
559 604
605 if (!bio_flagged(bio, BIO_NULL_MAPPED))
606 ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
607 bmd->nr_sgvecs, 1, bmd->is_our_pages);
560 bio_free_map_data(bmd); 608 bio_free_map_data(bmd);
561 bio_put(bio); 609 bio_put(bio);
562 return ret; 610 return ret;
@@ -565,16 +613,20 @@ int bio_uncopy_user(struct bio *bio)
565/** 613/**
566 * bio_copy_user_iov - copy user data to bio 614 * bio_copy_user_iov - copy user data to bio
567 * @q: destination block queue 615 * @q: destination block queue
616 * @map_data: pointer to the rq_map_data holding pages (if necessary)
568 * @iov: the iovec. 617 * @iov: the iovec.
569 * @iov_count: number of elements in the iovec 618 * @iov_count: number of elements in the iovec
570 * @write_to_vm: bool indicating writing to pages or not 619 * @write_to_vm: bool indicating writing to pages or not
620 * @gfp_mask: memory allocation flags
571 * 621 *
572 * Prepares and returns a bio for indirect user io, bouncing data 622 * Prepares and returns a bio for indirect user io, bouncing data
573 * to/from kernel pages as necessary. Must be paired with 623 * to/from kernel pages as necessary. Must be paired with
574 * call bio_uncopy_user() on io completion. 624 * call bio_uncopy_user() on io completion.
575 */ 625 */
576struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, 626struct bio *bio_copy_user_iov(struct request_queue *q,
577 int iov_count, int write_to_vm) 627 struct rq_map_data *map_data,
628 struct sg_iovec *iov, int iov_count,
629 int write_to_vm, gfp_t gfp_mask)
578{ 630{
579 struct bio_map_data *bmd; 631 struct bio_map_data *bmd;
580 struct bio_vec *bvec; 632 struct bio_vec *bvec;
@@ -597,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
597 len += iov[i].iov_len; 649 len += iov[i].iov_len;
598 } 650 }
599 651
600 bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); 652 bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask);
601 if (!bmd) 653 if (!bmd)
602 return ERR_PTR(-ENOMEM); 654 return ERR_PTR(-ENOMEM);
603 655
604 ret = -ENOMEM; 656 ret = -ENOMEM;
605 bio = bio_alloc(GFP_KERNEL, nr_pages); 657 bio = bio_alloc(gfp_mask, nr_pages);
606 if (!bio) 658 if (!bio)
607 goto out_bmd; 659 goto out_bmd;
608 660
609 bio->bi_rw |= (!write_to_vm << BIO_RW); 661 bio->bi_rw |= (!write_to_vm << BIO_RW);
610 662
611 ret = 0; 663 ret = 0;
664 i = 0;
612 while (len) { 665 while (len) {
613 unsigned int bytes = PAGE_SIZE; 666 unsigned int bytes;
667
668 if (map_data)
669 bytes = 1U << (PAGE_SHIFT + map_data->page_order);
670 else
671 bytes = PAGE_SIZE;
614 672
615 if (bytes > len) 673 if (bytes > len)
616 bytes = len; 674 bytes = len;
617 675
618 page = alloc_page(q->bounce_gfp | GFP_KERNEL); 676 if (map_data) {
677 if (i == map_data->nr_entries) {
678 ret = -ENOMEM;
679 break;
680 }
681 page = map_data->pages[i++];
682 } else
683 page = alloc_page(q->bounce_gfp | gfp_mask);
619 if (!page) { 684 if (!page) {
620 ret = -ENOMEM; 685 ret = -ENOMEM;
621 break; 686 break;
@@ -634,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov,
634 * success 699 * success
635 */ 700 */
636 if (!write_to_vm) { 701 if (!write_to_vm) {
637 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); 702 ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
638 if (ret) 703 if (ret)
639 goto cleanup; 704 goto cleanup;
640 } 705 }
641 706
642 bio_set_map_data(bmd, bio, iov, iov_count); 707 bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1);
643 return bio; 708 return bio;
644cleanup: 709cleanup:
645 bio_for_each_segment(bvec, bio, i) 710 if (!map_data)
646 __free_page(bvec->bv_page); 711 bio_for_each_segment(bvec, bio, i)
712 __free_page(bvec->bv_page);
647 713
648 bio_put(bio); 714 bio_put(bio);
649out_bmd: 715out_bmd:
@@ -654,29 +720,32 @@ out_bmd:
654/** 720/**
655 * bio_copy_user - copy user data to bio 721 * bio_copy_user - copy user data to bio
656 * @q: destination block queue 722 * @q: destination block queue
723 * @map_data: pointer to the rq_map_data holding pages (if necessary)
657 * @uaddr: start of user address 724 * @uaddr: start of user address
658 * @len: length in bytes 725 * @len: length in bytes
659 * @write_to_vm: bool indicating writing to pages or not 726 * @write_to_vm: bool indicating writing to pages or not
727 * @gfp_mask: memory allocation flags
660 * 728 *
661 * Prepares and returns a bio for indirect user io, bouncing data 729 * Prepares and returns a bio for indirect user io, bouncing data
662 * to/from kernel pages as necessary. Must be paired with 730 * to/from kernel pages as necessary. Must be paired with
663 * call bio_uncopy_user() on io completion. 731 * call bio_uncopy_user() on io completion.
664 */ 732 */
665struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, 733struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data,
666 unsigned int len, int write_to_vm) 734 unsigned long uaddr, unsigned int len,
735 int write_to_vm, gfp_t gfp_mask)
667{ 736{
668 struct sg_iovec iov; 737 struct sg_iovec iov;
669 738
670 iov.iov_base = (void __user *)uaddr; 739 iov.iov_base = (void __user *)uaddr;
671 iov.iov_len = len; 740 iov.iov_len = len;
672 741
673 return bio_copy_user_iov(q, &iov, 1, write_to_vm); 742 return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
674} 743}
675 744
676static struct bio *__bio_map_user_iov(struct request_queue *q, 745static struct bio *__bio_map_user_iov(struct request_queue *q,
677 struct block_device *bdev, 746 struct block_device *bdev,
678 struct sg_iovec *iov, int iov_count, 747 struct sg_iovec *iov, int iov_count,
679 int write_to_vm) 748 int write_to_vm, gfp_t gfp_mask)
680{ 749{
681 int i, j; 750 int i, j;
682 int nr_pages = 0; 751 int nr_pages = 0;
@@ -702,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
702 if (!nr_pages) 771 if (!nr_pages)
703 return ERR_PTR(-EINVAL); 772 return ERR_PTR(-EINVAL);
704 773
705 bio = bio_alloc(GFP_KERNEL, nr_pages); 774 bio = bio_alloc(gfp_mask, nr_pages);
706 if (!bio) 775 if (!bio)
707 return ERR_PTR(-ENOMEM); 776 return ERR_PTR(-ENOMEM);
708 777
709 ret = -ENOMEM; 778 ret = -ENOMEM;
710 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 779 pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask);
711 if (!pages) 780 if (!pages)
712 goto out; 781 goto out;
713 782
@@ -786,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
786 * @uaddr: start of user address 855 * @uaddr: start of user address
787 * @len: length in bytes 856 * @len: length in bytes
788 * @write_to_vm: bool indicating writing to pages or not 857 * @write_to_vm: bool indicating writing to pages or not
858 * @gfp_mask: memory allocation flags
789 * 859 *
790 * Map the user space address into a bio suitable for io to a block 860 * Map the user space address into a bio suitable for io to a block
791 * device. Returns an error pointer in case of error. 861 * device. Returns an error pointer in case of error.
792 */ 862 */
793struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, 863struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
794 unsigned long uaddr, unsigned int len, int write_to_vm) 864 unsigned long uaddr, unsigned int len, int write_to_vm,
865 gfp_t gfp_mask)
795{ 866{
796 struct sg_iovec iov; 867 struct sg_iovec iov;
797 868
798 iov.iov_base = (void __user *)uaddr; 869 iov.iov_base = (void __user *)uaddr;
799 iov.iov_len = len; 870 iov.iov_len = len;
800 871
801 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); 872 return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
802} 873}
803 874
804/** 875/**
@@ -808,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev,
808 * @iov: the iovec. 879 * @iov: the iovec.
809 * @iov_count: number of elements in the iovec 880 * @iov_count: number of elements in the iovec
810 * @write_to_vm: bool indicating writing to pages or not 881 * @write_to_vm: bool indicating writing to pages or not
882 * @gfp_mask: memory allocation flags
811 * 883 *
812 * Map the user space address into a bio suitable for io to a block 884 * Map the user space address into a bio suitable for io to a block
813 * device. Returns an error pointer in case of error. 885 * device. Returns an error pointer in case of error.
814 */ 886 */
815struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, 887struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
816 struct sg_iovec *iov, int iov_count, 888 struct sg_iovec *iov, int iov_count,
817 int write_to_vm) 889 int write_to_vm, gfp_t gfp_mask)
818{ 890{
819 struct bio *bio; 891 struct bio *bio;
820 892
821 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); 893 bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm,
822 894 gfp_mask);
823 if (IS_ERR(bio)) 895 if (IS_ERR(bio))
824 return bio; 896 return bio;
825 897
@@ -976,48 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
976struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, 1048struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
977 gfp_t gfp_mask, int reading) 1049 gfp_t gfp_mask, int reading)
978{ 1050{
979 unsigned long kaddr = (unsigned long)data;
980 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
981 unsigned long start = kaddr >> PAGE_SHIFT;
982 const int nr_pages = end - start;
983 struct bio *bio; 1051 struct bio *bio;
984 struct bio_vec *bvec; 1052 struct bio_vec *bvec;
985 struct bio_map_data *bmd; 1053 int i;
986 int i, ret;
987 struct sg_iovec iov;
988
989 iov.iov_base = data;
990 iov.iov_len = len;
991
992 bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask);
993 if (!bmd)
994 return ERR_PTR(-ENOMEM);
995
996 ret = -ENOMEM;
997 bio = bio_alloc(gfp_mask, nr_pages);
998 if (!bio)
999 goto out_bmd;
1000
1001 while (len) {
1002 struct page *page;
1003 unsigned int bytes = PAGE_SIZE;
1004
1005 if (bytes > len)
1006 bytes = len;
1007
1008 page = alloc_page(q->bounce_gfp | gfp_mask);
1009 if (!page) {
1010 ret = -ENOMEM;
1011 goto cleanup;
1012 }
1013
1014 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
1015 ret = -EINVAL;
1016 goto cleanup;
1017 }
1018 1054
1019 len -= bytes; 1055 bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask);
1020 } 1056 if (IS_ERR(bio))
1057 return bio;
1021 1058
1022 if (!reading) { 1059 if (!reading) {
1023 void *p = data; 1060 void *p = data;
@@ -1030,20 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
1030 } 1067 }
1031 } 1068 }
1032 1069
1033 bio->bi_private = bmd;
1034 bio->bi_end_io = bio_copy_kern_endio; 1070 bio->bi_end_io = bio_copy_kern_endio;
1035 1071
1036 bio_set_map_data(bmd, bio, &iov, 1);
1037 return bio; 1072 return bio;
1038cleanup:
1039 bio_for_each_segment(bvec, bio, i)
1040 __free_page(bvec->bv_page);
1041
1042 bio_put(bio);
1043out_bmd:
1044 bio_free_map_data(bmd);
1045
1046 return ERR_PTR(ret);
1047} 1073}
1048 1074
1049/* 1075/*
@@ -1230,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err)
1230 * split a bio - only worry about a bio with a single page 1256 * split a bio - only worry about a bio with a single page
1231 * in it's iovec 1257 * in it's iovec
1232 */ 1258 */
1233struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) 1259struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1234{ 1260{
1235 struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); 1261 struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
1236 1262
1237 if (!bp) 1263 if (!bp)
1238 return bp; 1264 return bp;
@@ -1266,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
1266 bp->bio2.bi_end_io = bio_pair_end_2; 1292 bp->bio2.bi_end_io = bio_pair_end_2;
1267 1293
1268 bp->bio1.bi_private = bi; 1294 bp->bio1.bi_private = bi;
1269 bp->bio2.bi_private = pool; 1295 bp->bio2.bi_private = bio_split_pool;
1270 1296
1271 if (bio_integrity(bi)) 1297 if (bio_integrity(bi))
1272 bio_integrity_split(bi, bp, first_sectors); 1298 bio_integrity_split(bi, bp, first_sectors);
@@ -1274,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
1274 return bp; 1300 return bp;
1275} 1301}
1276 1302
1303/**
1304 * bio_sector_offset - Find hardware sector offset in bio
1305 * @bio: bio to inspect
1306 * @index: bio_vec index
1307 * @offset: offset in bv_page
1308 *
1309 * Return the number of hardware sectors between beginning of bio
1310 * and an end point indicated by a bio_vec index and an offset
1311 * within that vector's page.
1312 */
1313sector_t bio_sector_offset(struct bio *bio, unsigned short index,
1314 unsigned int offset)
1315{
1316 unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue);
1317 struct bio_vec *bv;
1318 sector_t sectors;
1319 int i;
1320
1321 sectors = 0;
1322
1323 if (index >= bio->bi_idx)
1324 index = bio->bi_vcnt - 1;
1325
1326 __bio_for_each_segment(bv, bio, i, 0) {
1327 if (i == index) {
1328 if (offset > bv->bv_offset)
1329 sectors += (offset - bv->bv_offset) / sector_sz;
1330 break;
1331 }
1332
1333 sectors += bv->bv_len / sector_sz;
1334 }
1335
1336 return sectors;
1337}
1338EXPORT_SYMBOL(bio_sector_offset);
1277 1339
1278/* 1340/*
1279 * create memory pools for biovec's in a bio_set. 1341 * create memory pools for biovec's in a bio_set.
@@ -1376,6 +1438,7 @@ static int __init init_bio(void)
1376subsys_initcall(init_bio); 1438subsys_initcall(init_bio);
1377 1439
1378EXPORT_SYMBOL(bio_alloc); 1440EXPORT_SYMBOL(bio_alloc);
1441EXPORT_SYMBOL(bio_kmalloc);
1379EXPORT_SYMBOL(bio_put); 1442EXPORT_SYMBOL(bio_put);
1380EXPORT_SYMBOL(bio_free); 1443EXPORT_SYMBOL(bio_free);
1381EXPORT_SYMBOL(bio_endio); 1444EXPORT_SYMBOL(bio_endio);
@@ -1383,7 +1446,6 @@ EXPORT_SYMBOL(bio_init);
1383EXPORT_SYMBOL(__bio_clone); 1446EXPORT_SYMBOL(__bio_clone);
1384EXPORT_SYMBOL(bio_clone); 1447EXPORT_SYMBOL(bio_clone);
1385EXPORT_SYMBOL(bio_phys_segments); 1448EXPORT_SYMBOL(bio_phys_segments);
1386EXPORT_SYMBOL(bio_hw_segments);
1387EXPORT_SYMBOL(bio_add_page); 1449EXPORT_SYMBOL(bio_add_page);
1388EXPORT_SYMBOL(bio_add_pc_page); 1450EXPORT_SYMBOL(bio_add_pc_page);
1389EXPORT_SYMBOL(bio_get_nr_vecs); 1451EXPORT_SYMBOL(bio_get_nr_vecs);
@@ -1393,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern);
1393EXPORT_SYMBOL(bio_copy_kern); 1455EXPORT_SYMBOL(bio_copy_kern);
1394EXPORT_SYMBOL(bio_pair_release); 1456EXPORT_SYMBOL(bio_pair_release);
1395EXPORT_SYMBOL(bio_split); 1457EXPORT_SYMBOL(bio_split);
1396EXPORT_SYMBOL(bio_split_pool);
1397EXPORT_SYMBOL(bio_copy_user); 1458EXPORT_SYMBOL(bio_copy_user);
1398EXPORT_SYMBOL(bio_uncopy_user); 1459EXPORT_SYMBOL(bio_uncopy_user);
1399EXPORT_SYMBOL(bioset_create); 1460EXPORT_SYMBOL(bioset_create);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index aff54219e049..d84f0469a016 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -540,22 +540,6 @@ EXPORT_SYMBOL(bd_release);
540 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 540 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0
541 */ 541 */
542 542
543static struct kobject *bdev_get_kobj(struct block_device *bdev)
544{
545 if (bdev->bd_contains != bdev)
546 return kobject_get(&bdev->bd_part->dev.kobj);
547 else
548 return kobject_get(&bdev->bd_disk->dev.kobj);
549}
550
551static struct kobject *bdev_get_holder(struct block_device *bdev)
552{
553 if (bdev->bd_contains != bdev)
554 return kobject_get(bdev->bd_part->holder_dir);
555 else
556 return kobject_get(bdev->bd_disk->holder_dir);
557}
558
559static int add_symlink(struct kobject *from, struct kobject *to) 543static int add_symlink(struct kobject *from, struct kobject *to)
560{ 544{
561 if (!from || !to) 545 if (!from || !to)
@@ -604,11 +588,11 @@ static int bd_holder_grab_dirs(struct block_device *bdev,
604 if (!bo->hdev) 588 if (!bo->hdev)
605 goto fail_put_sdir; 589 goto fail_put_sdir;
606 590
607 bo->sdev = bdev_get_kobj(bdev); 591 bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj);
608 if (!bo->sdev) 592 if (!bo->sdev)
609 goto fail_put_hdev; 593 goto fail_put_hdev;
610 594
611 bo->hdir = bdev_get_holder(bdev); 595 bo->hdir = kobject_get(bdev->bd_part->holder_dir);
612 if (!bo->hdir) 596 if (!bo->hdir)
613 goto fail_put_sdev; 597 goto fail_put_sdev;
614 598
@@ -868,6 +852,87 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode)
868 852
869EXPORT_SYMBOL(open_by_devnum); 853EXPORT_SYMBOL(open_by_devnum);
870 854
855/**
856 * flush_disk - invalidates all buffer-cache entries on a disk
857 *
858 * @bdev: struct block device to be flushed
859 *
860 * Invalidates all buffer-cache entries on a disk. It should be called
861 * when a disk has been changed -- either by a media change or online
862 * resize.
863 */
864static void flush_disk(struct block_device *bdev)
865{
866 if (__invalidate_device(bdev)) {
867 char name[BDEVNAME_SIZE] = "";
868
869 if (bdev->bd_disk)
870 disk_name(bdev->bd_disk, 0, name);
871 printk(KERN_WARNING "VFS: busy inodes on changed media or "
872 "resized disk %s\n", name);
873 }
874
875 if (!bdev->bd_disk)
876 return;
877 if (disk_partitionable(bdev->bd_disk))
878 bdev->bd_invalidated = 1;
879}
880
881/**
882 * check_disk_size_change - checks for disk size change and adjusts bdev size.
883 * @disk: struct gendisk to check
884 * @bdev: struct bdev to adjust.
885 *
886 * This routine checks to see if the bdev size does not match the disk size
887 * and adjusts it if it differs.
888 */
889void check_disk_size_change(struct gendisk *disk, struct block_device *bdev)
890{
891 loff_t disk_size, bdev_size;
892
893 disk_size = (loff_t)get_capacity(disk) << 9;
894 bdev_size = i_size_read(bdev->bd_inode);
895 if (disk_size != bdev_size) {
896 char name[BDEVNAME_SIZE];
897
898 disk_name(disk, 0, name);
899 printk(KERN_INFO
900 "%s: detected capacity change from %lld to %lld\n",
901 name, bdev_size, disk_size);
902 i_size_write(bdev->bd_inode, disk_size);
903 flush_disk(bdev);
904 }
905}
906EXPORT_SYMBOL(check_disk_size_change);
907
908/**
909 * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back
910 * @disk: struct gendisk to be revalidated
911 *
912 * This routine is a wrapper for lower-level driver's revalidate_disk
913 * call-backs. It is used to do common pre and post operations needed
914 * for all revalidate_disk operations.
915 */
916int revalidate_disk(struct gendisk *disk)
917{
918 struct block_device *bdev;
919 int ret = 0;
920
921 if (disk->fops->revalidate_disk)
922 ret = disk->fops->revalidate_disk(disk);
923
924 bdev = bdget_disk(disk, 0);
925 if (!bdev)
926 return ret;
927
928 mutex_lock(&bdev->bd_mutex);
929 check_disk_size_change(disk, bdev);
930 mutex_unlock(&bdev->bd_mutex);
931 bdput(bdev);
932 return ret;
933}
934EXPORT_SYMBOL(revalidate_disk);
935
871/* 936/*
872 * This routine checks whether a removable media has been changed, 937 * This routine checks whether a removable media has been changed,
873 * and invalidates all buffer-cache-entries in that case. This 938 * and invalidates all buffer-cache-entries in that case. This
@@ -887,13 +952,9 @@ int check_disk_change(struct block_device *bdev)
887 if (!bdops->media_changed(bdev->bd_disk)) 952 if (!bdops->media_changed(bdev->bd_disk))
888 return 0; 953 return 0;
889 954
890 if (__invalidate_device(bdev)) 955 flush_disk(bdev);
891 printk("VFS: busy inodes on changed media.\n");
892
893 if (bdops->revalidate_disk) 956 if (bdops->revalidate_disk)
894 bdops->revalidate_disk(bdev->bd_disk); 957 bdops->revalidate_disk(bdev->bd_disk);
895 if (bdev->bd_disk->minors > 1)
896 bdev->bd_invalidated = 1;
897 return 1; 958 return 1;
898} 959}
899 960
@@ -927,10 +988,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part);
927 988
928static int do_open(struct block_device *bdev, struct file *file, int for_part) 989static int do_open(struct block_device *bdev, struct file *file, int for_part)
929{ 990{
930 struct module *owner = NULL;
931 struct gendisk *disk; 991 struct gendisk *disk;
992 struct hd_struct *part = NULL;
932 int ret; 993 int ret;
933 int part; 994 int partno;
934 int perm = 0; 995 int perm = 0;
935 996
936 if (file->f_mode & FMODE_READ) 997 if (file->f_mode & FMODE_READ)
@@ -948,25 +1009,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
948 1009
949 ret = -ENXIO; 1010 ret = -ENXIO;
950 file->f_mapping = bdev->bd_inode->i_mapping; 1011 file->f_mapping = bdev->bd_inode->i_mapping;
1012
951 lock_kernel(); 1013 lock_kernel();
952 disk = get_gendisk(bdev->bd_dev, &part); 1014
953 if (!disk) { 1015 disk = get_gendisk(bdev->bd_dev, &partno);
954 unlock_kernel(); 1016 if (!disk)
955 bdput(bdev); 1017 goto out_unlock_kernel;
956 return ret; 1018 part = disk_get_part(disk, partno);
957 } 1019 if (!part)
958 owner = disk->fops->owner; 1020 goto out_unlock_kernel;
959 1021
960 mutex_lock_nested(&bdev->bd_mutex, for_part); 1022 mutex_lock_nested(&bdev->bd_mutex, for_part);
961 if (!bdev->bd_openers) { 1023 if (!bdev->bd_openers) {
962 bdev->bd_disk = disk; 1024 bdev->bd_disk = disk;
1025 bdev->bd_part = part;
963 bdev->bd_contains = bdev; 1026 bdev->bd_contains = bdev;
964 if (!part) { 1027 if (!partno) {
965 struct backing_dev_info *bdi; 1028 struct backing_dev_info *bdi;
966 if (disk->fops->open) { 1029 if (disk->fops->open) {
967 ret = disk->fops->open(bdev->bd_inode, file); 1030 ret = disk->fops->open(bdev->bd_inode, file);
968 if (ret) 1031 if (ret)
969 goto out_first; 1032 goto out_clear;
970 } 1033 }
971 if (!bdev->bd_openers) { 1034 if (!bdev->bd_openers) {
972 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1035 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
@@ -978,36 +1041,36 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
978 if (bdev->bd_invalidated) 1041 if (bdev->bd_invalidated)
979 rescan_partitions(disk, bdev); 1042 rescan_partitions(disk, bdev);
980 } else { 1043 } else {
981 struct hd_struct *p;
982 struct block_device *whole; 1044 struct block_device *whole;
983 whole = bdget_disk(disk, 0); 1045 whole = bdget_disk(disk, 0);
984 ret = -ENOMEM; 1046 ret = -ENOMEM;
985 if (!whole) 1047 if (!whole)
986 goto out_first; 1048 goto out_clear;
987 BUG_ON(for_part); 1049 BUG_ON(for_part);
988 ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); 1050 ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1);
989 if (ret) 1051 if (ret)
990 goto out_first; 1052 goto out_clear;
991 bdev->bd_contains = whole; 1053 bdev->bd_contains = whole;
992 p = disk->part[part - 1];
993 bdev->bd_inode->i_data.backing_dev_info = 1054 bdev->bd_inode->i_data.backing_dev_info =
994 whole->bd_inode->i_data.backing_dev_info; 1055 whole->bd_inode->i_data.backing_dev_info;
995 if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { 1056 if (!(disk->flags & GENHD_FL_UP) ||
1057 !part || !part->nr_sects) {
996 ret = -ENXIO; 1058 ret = -ENXIO;
997 goto out_first; 1059 goto out_clear;
998 } 1060 }
999 kobject_get(&p->dev.kobj); 1061 bd_set_size(bdev, (loff_t)part->nr_sects << 9);
1000 bdev->bd_part = p;
1001 bd_set_size(bdev, (loff_t) p->nr_sects << 9);
1002 } 1062 }
1003 } else { 1063 } else {
1064 disk_put_part(part);
1004 put_disk(disk); 1065 put_disk(disk);
1005 module_put(owner); 1066 module_put(disk->fops->owner);
1067 part = NULL;
1068 disk = NULL;
1006 if (bdev->bd_contains == bdev) { 1069 if (bdev->bd_contains == bdev) {
1007 if (bdev->bd_disk->fops->open) { 1070 if (bdev->bd_disk->fops->open) {
1008 ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); 1071 ret = bdev->bd_disk->fops->open(bdev->bd_inode, file);
1009 if (ret) 1072 if (ret)
1010 goto out; 1073 goto out_unlock_bdev;
1011 } 1074 }
1012 if (bdev->bd_invalidated) 1075 if (bdev->bd_invalidated)
1013 rescan_partitions(bdev->bd_disk, bdev); 1076 rescan_partitions(bdev->bd_disk, bdev);
@@ -1020,19 +1083,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1020 unlock_kernel(); 1083 unlock_kernel();
1021 return 0; 1084 return 0;
1022 1085
1023out_first: 1086 out_clear:
1024 bdev->bd_disk = NULL; 1087 bdev->bd_disk = NULL;
1088 bdev->bd_part = NULL;
1025 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1089 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1026 if (bdev != bdev->bd_contains) 1090 if (bdev != bdev->bd_contains)
1027 __blkdev_put(bdev->bd_contains, 1); 1091 __blkdev_put(bdev->bd_contains, 1);
1028 bdev->bd_contains = NULL; 1092 bdev->bd_contains = NULL;
1029 put_disk(disk); 1093 out_unlock_bdev:
1030 module_put(owner);
1031out:
1032 mutex_unlock(&bdev->bd_mutex); 1094 mutex_unlock(&bdev->bd_mutex);
1095 out_unlock_kernel:
1033 unlock_kernel(); 1096 unlock_kernel();
1034 if (ret) 1097
1035 bdput(bdev); 1098 disk_put_part(part);
1099 if (disk)
1100 module_put(disk->fops->owner);
1101 put_disk(disk);
1102 bdput(bdev);
1103
1036 return ret; 1104 return ret;
1037} 1105}
1038 1106
@@ -1117,11 +1185,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
1117 1185
1118 put_disk(disk); 1186 put_disk(disk);
1119 module_put(owner); 1187 module_put(owner);
1120 1188 disk_put_part(bdev->bd_part);
1121 if (bdev->bd_contains != bdev) { 1189 bdev->bd_part = NULL;
1122 kobject_put(&bdev->bd_part->dev.kobj);
1123 bdev->bd_part = NULL;
1124 }
1125 bdev->bd_disk = NULL; 1190 bdev->bd_disk = NULL;
1126 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1191 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1127 if (bdev != bdev->bd_contains) 1192 if (bdev != bdev->bd_contains)
@@ -1197,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev);
1197 1262
1198/** 1263/**
1199 * lookup_bdev - lookup a struct block_device by name 1264 * lookup_bdev - lookup a struct block_device by name
1265 * @pathname: special file representing the block device
1200 * 1266 *
1201 * @path: special file representing the block device 1267 * Get a reference to the blockdevice at @pathname in the current
1202 *
1203 * Get a reference to the blockdevice at @path in the current
1204 * namespace if possible and return it. Return ERR_PTR(error) 1268 * namespace if possible and return it. Return ERR_PTR(error)
1205 * otherwise. 1269 * otherwise.
1206 */ 1270 */
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 302e95c4af7e..fb98b3d847ed 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -6,6 +6,7 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/msdos_fs.h> 8#include <linux/msdos_fs.h>
9#include <linux/blkdev.h>
9 10
10struct fatent_operations { 11struct fatent_operations {
11 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); 12 void (*ent_blocknr)(struct super_block *, int, int *, sector_t *);
@@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
535 struct fat_entry fatent; 536 struct fat_entry fatent;
536 struct buffer_head *bhs[MAX_BUF_PER_PAGE]; 537 struct buffer_head *bhs[MAX_BUF_PER_PAGE];
537 int i, err, nr_bhs; 538 int i, err, nr_bhs;
539 int first_cl = cluster;
538 540
539 nr_bhs = 0; 541 nr_bhs = 0;
540 fatent_init(&fatent); 542 fatent_init(&fatent);
@@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inode, int cluster)
551 goto error; 553 goto error;
552 } 554 }
553 555
556 /*
557 * Issue discard for the sectors we no longer care about,
558 * batching contiguous clusters into one request
559 */
560 if (cluster != fatent.entry + 1) {
561 int nr_clus = fatent.entry - first_cl + 1;
562
563 sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl),
564 nr_clus * sbi->sec_per_clus);
565 first_cl = cluster;
566 }
567
554 ops->ent_put(&fatent, FAT_ENT_FREE); 568 ops->ent_put(&fatent, FAT_ENT_FREE);
555 if (sbi->free_clusters != -1) { 569 if (sbi->free_clusters != -1) {
556 sbi->free_clusters++; 570 sbi->free_clusters++;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ecc3330972e5..7408227c49c9 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -120,22 +120,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) =
120 * a pointer to that same buffer (for convenience). 120 * a pointer to that same buffer (for convenience).
121 */ 121 */
122 122
123char *disk_name(struct gendisk *hd, int part, char *buf) 123char *disk_name(struct gendisk *hd, int partno, char *buf)
124{ 124{
125 if (!part) 125 if (!partno)
126 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 126 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
127 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 127 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
128 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part); 128 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
129 else 129 else
130 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part); 130 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
131 131
132 return buf; 132 return buf;
133} 133}
134 134
135const char *bdevname(struct block_device *bdev, char *buf) 135const char *bdevname(struct block_device *bdev, char *buf)
136{ 136{
137 int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; 137 return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
138 return disk_name(bdev->bd_disk, part, buf);
139} 138}
140 139
141EXPORT_SYMBOL(bdevname); 140EXPORT_SYMBOL(bdevname);
@@ -169,7 +168,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev)
169 if (isdigit(state->name[strlen(state->name)-1])) 168 if (isdigit(state->name[strlen(state->name)-1]))
170 sprintf(state->name, "p"); 169 sprintf(state->name, "p");
171 170
172 state->limit = hd->minors; 171 state->limit = disk_max_parts(hd);
173 i = res = err = 0; 172 i = res = err = 0;
174 while (!res && check_part[i]) { 173 while (!res && check_part[i]) {
175 memset(&state->parts, 0, sizeof(state->parts)); 174 memset(&state->parts, 0, sizeof(state->parts));
@@ -204,21 +203,22 @@ static ssize_t part_start_show(struct device *dev,
204 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); 203 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
205} 204}
206 205
207static ssize_t part_size_show(struct device *dev, 206ssize_t part_size_show(struct device *dev,
208 struct device_attribute *attr, char *buf) 207 struct device_attribute *attr, char *buf)
209{ 208{
210 struct hd_struct *p = dev_to_part(dev); 209 struct hd_struct *p = dev_to_part(dev);
211 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); 210 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
212} 211}
213 212
214static ssize_t part_stat_show(struct device *dev, 213ssize_t part_stat_show(struct device *dev,
215 struct device_attribute *attr, char *buf) 214 struct device_attribute *attr, char *buf)
216{ 215{
217 struct hd_struct *p = dev_to_part(dev); 216 struct hd_struct *p = dev_to_part(dev);
217 int cpu;
218 218
219 preempt_disable(); 219 cpu = part_stat_lock();
220 part_round_stats(p); 220 part_round_stats(cpu, p);
221 preempt_enable(); 221 part_stat_unlock();
222 return sprintf(buf, 222 return sprintf(buf,
223 "%8lu %8lu %8llu %8u " 223 "%8lu %8lu %8llu %8u "
224 "%8lu %8lu %8llu %8u " 224 "%8lu %8lu %8llu %8u "
@@ -238,17 +238,17 @@ static ssize_t part_stat_show(struct device *dev,
238} 238}
239 239
240#ifdef CONFIG_FAIL_MAKE_REQUEST 240#ifdef CONFIG_FAIL_MAKE_REQUEST
241static ssize_t part_fail_show(struct device *dev, 241ssize_t part_fail_show(struct device *dev,
242 struct device_attribute *attr, char *buf) 242 struct device_attribute *attr, char *buf)
243{ 243{
244 struct hd_struct *p = dev_to_part(dev); 244 struct hd_struct *p = dev_to_part(dev);
245 245
246 return sprintf(buf, "%d\n", p->make_it_fail); 246 return sprintf(buf, "%d\n", p->make_it_fail);
247} 247}
248 248
249static ssize_t part_fail_store(struct device *dev, 249ssize_t part_fail_store(struct device *dev,
250 struct device_attribute *attr, 250 struct device_attribute *attr,
251 const char *buf, size_t count) 251 const char *buf, size_t count)
252{ 252{
253 struct hd_struct *p = dev_to_part(dev); 253 struct hd_struct *p = dev_to_part(dev);
254 int i; 254 int i;
@@ -300,40 +300,34 @@ struct device_type part_type = {
300 .release = part_release, 300 .release = part_release,
301}; 301};
302 302
303static inline void partition_sysfs_add_subdir(struct hd_struct *p) 303static void delete_partition_rcu_cb(struct rcu_head *head)
304{
305 struct kobject *k;
306
307 k = kobject_get(&p->dev.kobj);
308 p->holder_dir = kobject_create_and_add("holders", k);
309 kobject_put(k);
310}
311
312static inline void disk_sysfs_add_subdirs(struct gendisk *disk)
313{ 304{
314 struct kobject *k; 305 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
315 306
316 k = kobject_get(&disk->dev.kobj); 307 part->start_sect = 0;
317 disk->holder_dir = kobject_create_and_add("holders", k); 308 part->nr_sects = 0;
318 disk->slave_dir = kobject_create_and_add("slaves", k); 309 part_stat_set_all(part, 0);
319 kobject_put(k); 310 put_device(part_to_dev(part));
320} 311}
321 312
322void delete_partition(struct gendisk *disk, int part) 313void delete_partition(struct gendisk *disk, int partno)
323{ 314{
324 struct hd_struct *p = disk->part[part-1]; 315 struct disk_part_tbl *ptbl = disk->part_tbl;
316 struct hd_struct *part;
325 317
326 if (!p) 318 if (partno >= ptbl->len)
327 return; 319 return;
328 if (!p->nr_sects) 320
321 part = ptbl->part[partno];
322 if (!part)
329 return; 323 return;
330 disk->part[part-1] = NULL; 324
331 p->start_sect = 0; 325 blk_free_devt(part_devt(part));
332 p->nr_sects = 0; 326 rcu_assign_pointer(ptbl->part[partno], NULL);
333 part_stat_set_all(p, 0); 327 kobject_put(part->holder_dir);
334 kobject_put(p->holder_dir); 328 device_del(part_to_dev(part));
335 device_del(&p->dev); 329
336 put_device(&p->dev); 330 call_rcu(&part->rcu_head, delete_partition_rcu_cb);
337} 331}
338 332
339static ssize_t whole_disk_show(struct device *dev, 333static ssize_t whole_disk_show(struct device *dev,
@@ -344,102 +338,132 @@ static ssize_t whole_disk_show(struct device *dev,
344static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 338static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
345 whole_disk_show, NULL); 339 whole_disk_show, NULL);
346 340
347int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) 341int add_partition(struct gendisk *disk, int partno,
342 sector_t start, sector_t len, int flags)
348{ 343{
349 struct hd_struct *p; 344 struct hd_struct *p;
345 dev_t devt = MKDEV(0, 0);
346 struct device *ddev = disk_to_dev(disk);
347 struct device *pdev;
348 struct disk_part_tbl *ptbl;
349 const char *dname;
350 int err; 350 int err;
351 351
352 err = disk_expand_part_tbl(disk, partno);
353 if (err)
354 return err;
355 ptbl = disk->part_tbl;
356
357 if (ptbl->part[partno])
358 return -EBUSY;
359
352 p = kzalloc(sizeof(*p), GFP_KERNEL); 360 p = kzalloc(sizeof(*p), GFP_KERNEL);
353 if (!p) 361 if (!p)
354 return -ENOMEM; 362 return -ENOMEM;
355 363
356 if (!init_part_stats(p)) { 364 if (!init_part_stats(p)) {
357 err = -ENOMEM; 365 err = -ENOMEM;
358 goto out0; 366 goto out_free;
359 } 367 }
368 pdev = part_to_dev(p);
369
360 p->start_sect = start; 370 p->start_sect = start;
361 p->nr_sects = len; 371 p->nr_sects = len;
362 p->partno = part; 372 p->partno = partno;
363 p->policy = disk->policy; 373 p->policy = get_disk_ro(disk);
364 374
365 if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) 375 dname = dev_name(ddev);
366 snprintf(p->dev.bus_id, BUS_ID_SIZE, 376 if (isdigit(dname[strlen(dname) - 1]))
367 "%sp%d", disk->dev.bus_id, part); 377 snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno);
368 else 378 else
369 snprintf(p->dev.bus_id, BUS_ID_SIZE, 379 snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno);
370 "%s%d", disk->dev.bus_id, part);
371 380
372 device_initialize(&p->dev); 381 device_initialize(pdev);
373 p->dev.devt = MKDEV(disk->major, disk->first_minor + part); 382 pdev->class = &block_class;
374 p->dev.class = &block_class; 383 pdev->type = &part_type;
375 p->dev.type = &part_type; 384 pdev->parent = ddev;
376 p->dev.parent = &disk->dev; 385
377 disk->part[part-1] = p; 386 err = blk_alloc_devt(p, &devt);
387 if (err)
388 goto out_free;
389 pdev->devt = devt;
378 390
379 /* delay uevent until 'holders' subdir is created */ 391 /* delay uevent until 'holders' subdir is created */
380 p->dev.uevent_suppress = 1; 392 pdev->uevent_suppress = 1;
381 err = device_add(&p->dev); 393 err = device_add(pdev);
382 if (err) 394 if (err)
383 goto out1; 395 goto out_put;
384 partition_sysfs_add_subdir(p); 396
385 p->dev.uevent_suppress = 0; 397 err = -ENOMEM;
398 p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
399 if (!p->holder_dir)
400 goto out_del;
401
402 pdev->uevent_suppress = 0;
386 if (flags & ADDPART_FLAG_WHOLEDISK) { 403 if (flags & ADDPART_FLAG_WHOLEDISK) {
387 err = device_create_file(&p->dev, &dev_attr_whole_disk); 404 err = device_create_file(pdev, &dev_attr_whole_disk);
388 if (err) 405 if (err)
389 goto out2; 406 goto out_del;
390 } 407 }
391 408
409 /* everything is up and running, commence */
410 INIT_RCU_HEAD(&p->rcu_head);
411 rcu_assign_pointer(ptbl->part[partno], p);
412
392 /* suppress uevent if the disk supresses it */ 413 /* suppress uevent if the disk supresses it */
393 if (!disk->dev.uevent_suppress) 414 if (!ddev->uevent_suppress)
394 kobject_uevent(&p->dev.kobj, KOBJ_ADD); 415 kobject_uevent(&pdev->kobj, KOBJ_ADD);
395 416
396 return 0; 417 return 0;
397 418
398out2: 419out_free:
399 device_del(&p->dev);
400out1:
401 put_device(&p->dev);
402 free_part_stats(p);
403out0:
404 kfree(p); 420 kfree(p);
405 return err; 421 return err;
422out_del:
423 kobject_put(p->holder_dir);
424 device_del(pdev);
425out_put:
426 put_device(pdev);
427 blk_free_devt(devt);
428 return err;
406} 429}
407 430
408/* Not exported, helper to add_disk(). */ 431/* Not exported, helper to add_disk(). */
409void register_disk(struct gendisk *disk) 432void register_disk(struct gendisk *disk)
410{ 433{
434 struct device *ddev = disk_to_dev(disk);
411 struct block_device *bdev; 435 struct block_device *bdev;
436 struct disk_part_iter piter;
437 struct hd_struct *part;
412 char *s; 438 char *s;
413 int i;
414 struct hd_struct *p;
415 int err; 439 int err;
416 440
417 disk->dev.parent = disk->driverfs_dev; 441 ddev->parent = disk->driverfs_dev;
418 disk->dev.devt = MKDEV(disk->major, disk->first_minor);
419 442
420 strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); 443 strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE);
421 /* ewww... some of these buggers have / in the name... */ 444 /* ewww... some of these buggers have / in the name... */
422 s = strchr(disk->dev.bus_id, '/'); 445 s = strchr(ddev->bus_id, '/');
423 if (s) 446 if (s)
424 *s = '!'; 447 *s = '!';
425 448
426 /* delay uevents, until we scanned partition table */ 449 /* delay uevents, until we scanned partition table */
427 disk->dev.uevent_suppress = 1; 450 ddev->uevent_suppress = 1;
428 451
429 if (device_add(&disk->dev)) 452 if (device_add(ddev))
430 return; 453 return;
431#ifndef CONFIG_SYSFS_DEPRECATED 454#ifndef CONFIG_SYSFS_DEPRECATED
432 err = sysfs_create_link(block_depr, &disk->dev.kobj, 455 err = sysfs_create_link(block_depr, &ddev->kobj,
433 kobject_name(&disk->dev.kobj)); 456 kobject_name(&ddev->kobj));
434 if (err) { 457 if (err) {
435 device_del(&disk->dev); 458 device_del(ddev);
436 return; 459 return;
437 } 460 }
438#endif 461#endif
439 disk_sysfs_add_subdirs(disk); 462 disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj);
463 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
440 464
441 /* No minors to use for partitions */ 465 /* No minors to use for partitions */
442 if (disk->minors == 1) 466 if (!disk_partitionable(disk))
443 goto exit; 467 goto exit;
444 468
445 /* No such device (e.g., media were just removed) */ 469 /* No such device (e.g., media were just removed) */
@@ -458,41 +482,57 @@ void register_disk(struct gendisk *disk)
458 482
459exit: 483exit:
460 /* announce disk after possible partitions are created */ 484 /* announce disk after possible partitions are created */
461 disk->dev.uevent_suppress = 0; 485 ddev->uevent_suppress = 0;
462 kobject_uevent(&disk->dev.kobj, KOBJ_ADD); 486 kobject_uevent(&ddev->kobj, KOBJ_ADD);
463 487
464 /* announce possible partitions */ 488 /* announce possible partitions */
465 for (i = 1; i < disk->minors; i++) { 489 disk_part_iter_init(&piter, disk, 0);
466 p = disk->part[i-1]; 490 while ((part = disk_part_iter_next(&piter)))
467 if (!p || !p->nr_sects) 491 kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD);
468 continue; 492 disk_part_iter_exit(&piter);
469 kobject_uevent(&p->dev.kobj, KOBJ_ADD);
470 }
471} 493}
472 494
473int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 495int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
474{ 496{
497 struct disk_part_iter piter;
498 struct hd_struct *part;
475 struct parsed_partitions *state; 499 struct parsed_partitions *state;
476 int p, res; 500 int p, highest, res;
477 501
478 if (bdev->bd_part_count) 502 if (bdev->bd_part_count)
479 return -EBUSY; 503 return -EBUSY;
480 res = invalidate_partition(disk, 0); 504 res = invalidate_partition(disk, 0);
481 if (res) 505 if (res)
482 return res; 506 return res;
483 bdev->bd_invalidated = 0; 507
484 for (p = 1; p < disk->minors; p++) 508 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
485 delete_partition(disk, p); 509 while ((part = disk_part_iter_next(&piter)))
510 delete_partition(disk, part->partno);
511 disk_part_iter_exit(&piter);
512
486 if (disk->fops->revalidate_disk) 513 if (disk->fops->revalidate_disk)
487 disk->fops->revalidate_disk(disk); 514 disk->fops->revalidate_disk(disk);
515 check_disk_size_change(disk, bdev);
516 bdev->bd_invalidated = 0;
488 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 517 if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
489 return 0; 518 return 0;
490 if (IS_ERR(state)) /* I/O error reading the partition table */ 519 if (IS_ERR(state)) /* I/O error reading the partition table */
491 return -EIO; 520 return -EIO;
492 521
493 /* tell userspace that the media / partition table may have changed */ 522 /* tell userspace that the media / partition table may have changed */
494 kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); 523 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
495 524
525 /* Detect the highest partition number and preallocate
526 * disk->part_tbl. This is an optimization and not strictly
527 * necessary.
528 */
529 for (p = 1, highest = 0; p < state->limit; p++)
530 if (state->parts[p].size)
531 highest = p;
532
533 disk_expand_part_tbl(disk, highest);
534
535 /* add partitions */
496 for (p = 1; p < state->limit; p++) { 536 for (p = 1; p < state->limit; p++) {
497 sector_t size = state->parts[p].size; 537 sector_t size = state->parts[p].size;
498 sector_t from = state->parts[p].from; 538 sector_t from = state->parts[p].from;
@@ -541,25 +581,31 @@ EXPORT_SYMBOL(read_dev_sector);
541 581
542void del_gendisk(struct gendisk *disk) 582void del_gendisk(struct gendisk *disk)
543{ 583{
544 int p; 584 struct disk_part_iter piter;
585 struct hd_struct *part;
545 586
546 /* invalidate stuff */ 587 /* invalidate stuff */
547 for (p = disk->minors - 1; p > 0; p--) { 588 disk_part_iter_init(&piter, disk,
548 invalidate_partition(disk, p); 589 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
549 delete_partition(disk, p); 590 while ((part = disk_part_iter_next(&piter))) {
591 invalidate_partition(disk, part->partno);
592 delete_partition(disk, part->partno);
550 } 593 }
594 disk_part_iter_exit(&piter);
595
551 invalidate_partition(disk, 0); 596 invalidate_partition(disk, 0);
552 disk->capacity = 0; 597 blk_free_devt(disk_to_dev(disk)->devt);
598 set_capacity(disk, 0);
553 disk->flags &= ~GENHD_FL_UP; 599 disk->flags &= ~GENHD_FL_UP;
554 unlink_gendisk(disk); 600 unlink_gendisk(disk);
555 disk_stat_set_all(disk, 0); 601 part_stat_set_all(&disk->part0, 0);
556 disk->stamp = 0; 602 disk->part0.stamp = 0;
557 603
558 kobject_put(disk->holder_dir); 604 kobject_put(disk->part0.holder_dir);
559 kobject_put(disk->slave_dir); 605 kobject_put(disk->slave_dir);
560 disk->driverfs_dev = NULL; 606 disk->driverfs_dev = NULL;
561#ifndef CONFIG_SYSFS_DEPRECATED 607#ifndef CONFIG_SYSFS_DEPRECATED
562 sysfs_remove_link(block_depr, disk->dev.bus_id); 608 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
563#endif 609#endif
564 device_del(&disk->dev); 610 device_del(disk_to_dev(disk));
565} 611}
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
index 17ae8ecd9e8b..98dbe1a84528 100644
--- a/fs/partitions/check.h
+++ b/fs/partitions/check.h
@@ -5,15 +5,13 @@
5 * add_gd_partition adds a partitions details to the devices partition 5 * add_gd_partition adds a partitions details to the devices partition
6 * description. 6 * description.
7 */ 7 */
8enum { MAX_PART = 256 };
9
10struct parsed_partitions { 8struct parsed_partitions {
11 char name[BDEVNAME_SIZE]; 9 char name[BDEVNAME_SIZE];
12 struct { 10 struct {
13 sector_t from; 11 sector_t from;
14 sector_t size; 12 sector_t size;
15 int flags; 13 int flags;
16 } parts[MAX_PART]; 14 } parts[DISK_MAX_PARTS];
17 int next; 15 int next;
18 int limit; 16 int limit;
19}; 17};
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index b68ec09399be..31474e89c59a 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -180,6 +180,7 @@ unifdef-y += audit.h
180unifdef-y += auto_fs.h 180unifdef-y += auto_fs.h
181unifdef-y += auxvec.h 181unifdef-y += auxvec.h
182unifdef-y += binfmts.h 182unifdef-y += binfmts.h
183unifdef-y += blktrace_api.h
183unifdef-y += capability.h 184unifdef-y += capability.h
184unifdef-y += capi.h 185unifdef-y += capi.h
185unifdef-y += cciss_ioctl.h 186unifdef-y += cciss_ioctl.h
diff --git a/include/linux/ata.h b/include/linux/ata.h
index a26ebd25bac1..be00973d1a8c 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -88,6 +88,7 @@ enum {
88 ATA_ID_DLF = 128, 88 ATA_ID_DLF = 128,
89 ATA_ID_CSFO = 129, 89 ATA_ID_CSFO = 129,
90 ATA_ID_CFA_POWER = 160, 90 ATA_ID_CFA_POWER = 160,
91 ATA_ID_ROT_SPEED = 217,
91 ATA_ID_PIO4 = (1 << 1), 92 ATA_ID_PIO4 = (1 << 1),
92 93
93 ATA_ID_SERNO_LEN = 20, 94 ATA_ID_SERNO_LEN = 20,
@@ -700,6 +701,11 @@ static inline int ata_id_is_cfa(const u16 *id)
700 return 0; 701 return 0;
701} 702}
702 703
704static inline int ata_id_is_ssd(const u16 *id)
705{
706 return id[ATA_ID_ROT_SPEED] == 0x01;
707}
708
703static inline int ata_drive_40wire(const u16 *dev_id) 709static inline int ata_drive_40wire(const u16 *dev_id)
704{ 710{
705 if (ata_id_is_sata(dev_id)) 711 if (ata_id_is_sata(dev_id))
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 0933a14e6414..ff5b4cf9e2da 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -26,21 +26,8 @@
26 26
27#ifdef CONFIG_BLOCK 27#ifdef CONFIG_BLOCK
28 28
29/* Platforms may set this to teach the BIO layer about IOMMU hardware. */
30#include <asm/io.h> 29#include <asm/io.h>
31 30
32#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY)
33#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1))
34#define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE)
35#else
36#define BIOVEC_VIRT_START_SIZE(x) 0
37#define BIOVEC_VIRT_OVERSIZE(x) 0
38#endif
39
40#ifndef BIO_VMERGE_BOUNDARY
41#define BIO_VMERGE_BOUNDARY 0
42#endif
43
44#define BIO_DEBUG 31#define BIO_DEBUG
45 32
46#ifdef BIO_DEBUG 33#ifdef BIO_DEBUG
@@ -88,25 +75,14 @@ struct bio {
88 /* Number of segments in this BIO after 75 /* Number of segments in this BIO after
89 * physical address coalescing is performed. 76 * physical address coalescing is performed.
90 */ 77 */
91 unsigned short bi_phys_segments; 78 unsigned int bi_phys_segments;
92
93 /* Number of segments after physical and DMA remapping
94 * hardware coalescing is performed.
95 */
96 unsigned short bi_hw_segments;
97 79
98 unsigned int bi_size; /* residual I/O count */ 80 unsigned int bi_size; /* residual I/O count */
99 81
100 /*
101 * To keep track of the max hw size, we account for the
102 * sizes of the first and last virtually mergeable segments
103 * in this bio
104 */
105 unsigned int bi_hw_front_size;
106 unsigned int bi_hw_back_size;
107
108 unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ 82 unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
109 83
84 unsigned int bi_comp_cpu; /* completion CPU */
85
110 struct bio_vec *bi_io_vec; /* the actual vec list */ 86 struct bio_vec *bi_io_vec; /* the actual vec list */
111 87
112 bio_end_io_t *bi_end_io; 88 bio_end_io_t *bi_end_io;
@@ -126,11 +102,14 @@ struct bio {
126#define BIO_UPTODATE 0 /* ok after I/O completion */ 102#define BIO_UPTODATE 0 /* ok after I/O completion */
127#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ 103#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */
128#define BIO_EOF 2 /* out-out-bounds error */ 104#define BIO_EOF 2 /* out-out-bounds error */
129#define BIO_SEG_VALID 3 /* nr_hw_seg valid */ 105#define BIO_SEG_VALID 3 /* bi_phys_segments valid */
130#define BIO_CLONED 4 /* doesn't own data */ 106#define BIO_CLONED 4 /* doesn't own data */
131#define BIO_BOUNCED 5 /* bio is a bounce bio */ 107#define BIO_BOUNCED 5 /* bio is a bounce bio */
132#define BIO_USER_MAPPED 6 /* contains user pages */ 108#define BIO_USER_MAPPED 6 /* contains user pages */
133#define BIO_EOPNOTSUPP 7 /* not supported */ 109#define BIO_EOPNOTSUPP 7 /* not supported */
110#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */
111#define BIO_NULL_MAPPED 9 /* contains invalid user pages */
112#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */
134#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) 113#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
135 114
136/* 115/*
@@ -144,18 +123,31 @@ struct bio {
144/* 123/*
145 * bio bi_rw flags 124 * bio bi_rw flags
146 * 125 *
147 * bit 0 -- read (not set) or write (set) 126 * bit 0 -- data direction
127 * If not set, bio is a read from device. If set, it's a write to device.
148 * bit 1 -- rw-ahead when set 128 * bit 1 -- rw-ahead when set
149 * bit 2 -- barrier 129 * bit 2 -- barrier
130 * Insert a serialization point in the IO queue, forcing previously
131 * submitted IO to be completed before this oen is issued.
150 * bit 3 -- fail fast, don't want low level driver retries 132 * bit 3 -- fail fast, don't want low level driver retries
151 * bit 4 -- synchronous I/O hint: the block layer will unplug immediately 133 * bit 4 -- synchronous I/O hint: the block layer will unplug immediately
134 * Note that this does NOT indicate that the IO itself is sync, just
135 * that the block layer will not postpone issue of this IO by plugging.
136 * bit 5 -- metadata request
137 * Used for tracing to differentiate metadata and data IO. May also
138 * get some preferential treatment in the IO scheduler
139 * bit 6 -- discard sectors
140 * Informs the lower level device that this range of sectors is no longer
141 * used by the file system and may thus be freed by the device. Used
142 * for flash based storage.
152 */ 143 */
153#define BIO_RW 0 144#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */
154#define BIO_RW_AHEAD 1 145#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */
155#define BIO_RW_BARRIER 2 146#define BIO_RW_BARRIER 2
156#define BIO_RW_FAILFAST 3 147#define BIO_RW_FAILFAST 3
157#define BIO_RW_SYNC 4 148#define BIO_RW_SYNC 4
158#define BIO_RW_META 5 149#define BIO_RW_META 5
150#define BIO_RW_DISCARD 6
159 151
160/* 152/*
161 * upper 16 bits of bi_rw define the io priority of this bio 153 * upper 16 bits of bi_rw define the io priority of this bio
@@ -185,14 +177,15 @@ struct bio {
185#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) 177#define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST))
186#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) 178#define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD))
187#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) 179#define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META))
188#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size) 180#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD))
181#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
189 182
190static inline unsigned int bio_cur_sectors(struct bio *bio) 183static inline unsigned int bio_cur_sectors(struct bio *bio)
191{ 184{
192 if (bio->bi_vcnt) 185 if (bio->bi_vcnt)
193 return bio_iovec(bio)->bv_len >> 9; 186 return bio_iovec(bio)->bv_len >> 9;
194 187 else /* dataless requests such as discard */
195 return 0; 188 return bio->bi_size >> 9;
196} 189}
197 190
198static inline void *bio_data(struct bio *bio) 191static inline void *bio_data(struct bio *bio)
@@ -236,8 +229,6 @@ static inline void *bio_data(struct bio *bio)
236 ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) 229 ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
237#endif 230#endif
238 231
239#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \
240 ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0)
241#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ 232#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
242 (((addr1) | (mask)) == (((addr2) - 1) | (mask))) 233 (((addr1) | (mask)) == (((addr2) - 1) | (mask)))
243#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ 234#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
@@ -319,15 +310,14 @@ struct bio_pair {
319 atomic_t cnt; 310 atomic_t cnt;
320 int error; 311 int error;
321}; 312};
322extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, 313extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
323 int first_sectors);
324extern mempool_t *bio_split_pool;
325extern void bio_pair_release(struct bio_pair *dbio); 314extern void bio_pair_release(struct bio_pair *dbio);
326 315
327extern struct bio_set *bioset_create(int, int); 316extern struct bio_set *bioset_create(int, int);
328extern void bioset_free(struct bio_set *); 317extern void bioset_free(struct bio_set *);
329 318
330extern struct bio *bio_alloc(gfp_t, int); 319extern struct bio *bio_alloc(gfp_t, int);
320extern struct bio *bio_kmalloc(gfp_t, int);
331extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); 321extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
332extern void bio_put(struct bio *); 322extern void bio_put(struct bio *);
333extern void bio_free(struct bio *, struct bio_set *); 323extern void bio_free(struct bio *, struct bio_set *);
@@ -335,7 +325,6 @@ extern void bio_free(struct bio *, struct bio_set *);
335extern void bio_endio(struct bio *, int); 325extern void bio_endio(struct bio *, int);
336struct request_queue; 326struct request_queue;
337extern int bio_phys_segments(struct request_queue *, struct bio *); 327extern int bio_phys_segments(struct request_queue *, struct bio *);
338extern int bio_hw_segments(struct request_queue *, struct bio *);
339 328
340extern void __bio_clone(struct bio *, struct bio *); 329extern void __bio_clone(struct bio *, struct bio *);
341extern struct bio *bio_clone(struct bio *, gfp_t); 330extern struct bio *bio_clone(struct bio *, gfp_t);
@@ -346,12 +335,14 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
346extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, 335extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
347 unsigned int, unsigned int); 336 unsigned int, unsigned int);
348extern int bio_get_nr_vecs(struct block_device *); 337extern int bio_get_nr_vecs(struct block_device *);
338extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int);
349extern struct bio *bio_map_user(struct request_queue *, struct block_device *, 339extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
350 unsigned long, unsigned int, int); 340 unsigned long, unsigned int, int, gfp_t);
351struct sg_iovec; 341struct sg_iovec;
342struct rq_map_data;
352extern struct bio *bio_map_user_iov(struct request_queue *, 343extern struct bio *bio_map_user_iov(struct request_queue *,
353 struct block_device *, 344 struct block_device *,
354 struct sg_iovec *, int, int); 345 struct sg_iovec *, int, int, gfp_t);
355extern void bio_unmap_user(struct bio *); 346extern void bio_unmap_user(struct bio *);
356extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, 347extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
357 gfp_t); 348 gfp_t);
@@ -359,15 +350,25 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
359 gfp_t, int); 350 gfp_t, int);
360extern void bio_set_pages_dirty(struct bio *bio); 351extern void bio_set_pages_dirty(struct bio *bio);
361extern void bio_check_pages_dirty(struct bio *bio); 352extern void bio_check_pages_dirty(struct bio *bio);
362extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); 353extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
363extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, 354 unsigned long, unsigned int, int, gfp_t);
364 int, int); 355extern struct bio *bio_copy_user_iov(struct request_queue *,
356 struct rq_map_data *, struct sg_iovec *,
357 int, int, gfp_t);
365extern int bio_uncopy_user(struct bio *); 358extern int bio_uncopy_user(struct bio *);
366void zero_fill_bio(struct bio *bio); 359void zero_fill_bio(struct bio *bio);
367extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); 360extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
368extern unsigned int bvec_nr_vecs(unsigned short idx); 361extern unsigned int bvec_nr_vecs(unsigned short idx);
369 362
370/* 363/*
364 * Allow queuer to specify a completion CPU for this bio
365 */
366static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
367{
368 bio->bi_comp_cpu = cpu;
369}
370
371/*
371 * bio_set is used to allow other portions of the IO system to 372 * bio_set is used to allow other portions of the IO system to
372 * allocate their own private memory pools for bio and iovec structures. 373 * allocate their own private memory pools for bio and iovec structures.
373 * These memory pools in turn all allocate from the bio_slab 374 * These memory pools in turn all allocate from the bio_slab
@@ -445,6 +446,14 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
445 __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) 446 __bio_kmap_irq((bio), (bio)->bi_idx, (flags))
446#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) 447#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
447 448
449/*
450 * Check whether this bio carries any data or not. A NULL bio is allowed.
451 */
452static inline int bio_has_data(struct bio *bio)
453{
454 return bio && bio->bi_io_vec != NULL;
455}
456
448#if defined(CONFIG_BLK_DEV_INTEGRITY) 457#if defined(CONFIG_BLK_DEV_INTEGRITY)
449 458
450#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) 459#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
@@ -458,14 +467,7 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
458#define bip_for_each_vec(bvl, bip, i) \ 467#define bip_for_each_vec(bvl, bip, i) \
459 __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) 468 __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx)
460 469
461static inline int bio_integrity(struct bio *bio) 470#define bio_integrity(bio) (bio->bi_integrity != NULL)
462{
463#if defined(CONFIG_BLK_DEV_INTEGRITY)
464 return bio->bi_integrity != NULL;
465#else
466 return 0;
467#endif
468}
469 471
470extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); 472extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
471extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); 473extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 53ea933cf60b..a92d9e4ea96e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -16,7 +16,9 @@
16#include <linux/bio.h> 16#include <linux/bio.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/stringify.h> 18#include <linux/stringify.h>
19#include <linux/gfp.h>
19#include <linux/bsg.h> 20#include <linux/bsg.h>
21#include <linux/smp.h>
20 22
21#include <asm/scatterlist.h> 23#include <asm/scatterlist.h>
22 24
@@ -54,7 +56,6 @@ enum rq_cmd_type_bits {
54 REQ_TYPE_PM_SUSPEND, /* suspend request */ 56 REQ_TYPE_PM_SUSPEND, /* suspend request */
55 REQ_TYPE_PM_RESUME, /* resume request */ 57 REQ_TYPE_PM_RESUME, /* resume request */
56 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ 58 REQ_TYPE_PM_SHUTDOWN, /* shutdown request */
57 REQ_TYPE_FLUSH, /* flush request */
58 REQ_TYPE_SPECIAL, /* driver defined type */ 59 REQ_TYPE_SPECIAL, /* driver defined type */
59 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ 60 REQ_TYPE_LINUX_BLOCK, /* generic block layer message */
60 /* 61 /*
@@ -76,19 +77,18 @@ enum rq_cmd_type_bits {
76 * 77 *
77 */ 78 */
78enum { 79enum {
79 /*
80 * just examples for now
81 */
82 REQ_LB_OP_EJECT = 0x40, /* eject request */ 80 REQ_LB_OP_EJECT = 0x40, /* eject request */
83 REQ_LB_OP_FLUSH = 0x41, /* flush device */ 81 REQ_LB_OP_FLUSH = 0x41, /* flush request */
82 REQ_LB_OP_DISCARD = 0x42, /* discard sectors */
84}; 83};
85 84
86/* 85/*
87 * request type modified bits. first three bits match BIO_RW* bits, important 86 * request type modified bits. first two bits match BIO_RW* bits, important
88 */ 87 */
89enum rq_flag_bits { 88enum rq_flag_bits {
90 __REQ_RW, /* not set, read. set, write */ 89 __REQ_RW, /* not set, read. set, write */
91 __REQ_FAILFAST, /* no low level driver retries */ 90 __REQ_FAILFAST, /* no low level driver retries */
91 __REQ_DISCARD, /* request to discard sectors */
92 __REQ_SORTED, /* elevator knows about this request */ 92 __REQ_SORTED, /* elevator knows about this request */
93 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 93 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
94 __REQ_HARDBARRIER, /* may not be passed by drive either */ 94 __REQ_HARDBARRIER, /* may not be passed by drive either */
@@ -111,6 +111,7 @@ enum rq_flag_bits {
111}; 111};
112 112
113#define REQ_RW (1 << __REQ_RW) 113#define REQ_RW (1 << __REQ_RW)
114#define REQ_DISCARD (1 << __REQ_DISCARD)
114#define REQ_FAILFAST (1 << __REQ_FAILFAST) 115#define REQ_FAILFAST (1 << __REQ_FAILFAST)
115#define REQ_SORTED (1 << __REQ_SORTED) 116#define REQ_SORTED (1 << __REQ_SORTED)
116#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) 117#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER)
@@ -140,12 +141,14 @@ enum rq_flag_bits {
140 */ 141 */
141struct request { 142struct request {
142 struct list_head queuelist; 143 struct list_head queuelist;
143 struct list_head donelist; 144 struct call_single_data csd;
145 int cpu;
144 146
145 struct request_queue *q; 147 struct request_queue *q;
146 148
147 unsigned int cmd_flags; 149 unsigned int cmd_flags;
148 enum rq_cmd_type_bits cmd_type; 150 enum rq_cmd_type_bits cmd_type;
151 unsigned long atomic_flags;
149 152
150 /* Maintain bio traversal state for part by part I/O submission. 153 /* Maintain bio traversal state for part by part I/O submission.
151 * hard_* are block layer internals, no driver should touch them! 154 * hard_* are block layer internals, no driver should touch them!
@@ -190,13 +193,6 @@ struct request {
190 */ 193 */
191 unsigned short nr_phys_segments; 194 unsigned short nr_phys_segments;
192 195
193 /* Number of scatter-gather addr+len pairs after
194 * physical and DMA remapping hardware coalescing is performed.
195 * This is the number of scatter-gather entries the driver
196 * will actually have to deal with after DMA mapping is done.
197 */
198 unsigned short nr_hw_segments;
199
200 unsigned short ioprio; 196 unsigned short ioprio;
201 197
202 void *special; 198 void *special;
@@ -220,6 +216,8 @@ struct request {
220 void *data; 216 void *data;
221 void *sense; 217 void *sense;
222 218
219 unsigned long deadline;
220 struct list_head timeout_list;
223 unsigned int timeout; 221 unsigned int timeout;
224 int retries; 222 int retries;
225 223
@@ -233,6 +231,11 @@ struct request {
233 struct request *next_rq; 231 struct request *next_rq;
234}; 232};
235 233
234static inline unsigned short req_get_ioprio(struct request *req)
235{
236 return req->ioprio;
237}
238
236/* 239/*
237 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME 240 * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
238 * requests. Some step values could eventually be made generic. 241 * requests. Some step values could eventually be made generic.
@@ -252,6 +255,7 @@ typedef void (request_fn_proc) (struct request_queue *q);
252typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); 255typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
253typedef int (prep_rq_fn) (struct request_queue *, struct request *); 256typedef int (prep_rq_fn) (struct request_queue *, struct request *);
254typedef void (unplug_fn) (struct request_queue *); 257typedef void (unplug_fn) (struct request_queue *);
258typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
255 259
256struct bio_vec; 260struct bio_vec;
257struct bvec_merge_data { 261struct bvec_merge_data {
@@ -265,6 +269,15 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
265typedef void (prepare_flush_fn) (struct request_queue *, struct request *); 269typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
266typedef void (softirq_done_fn)(struct request *); 270typedef void (softirq_done_fn)(struct request *);
267typedef int (dma_drain_needed_fn)(struct request *); 271typedef int (dma_drain_needed_fn)(struct request *);
272typedef int (lld_busy_fn) (struct request_queue *q);
273
274enum blk_eh_timer_return {
275 BLK_EH_NOT_HANDLED,
276 BLK_EH_HANDLED,
277 BLK_EH_RESET_TIMER,
278};
279
280typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *);
268 281
269enum blk_queue_state { 282enum blk_queue_state {
270 Queue_down, 283 Queue_down,
@@ -307,10 +320,13 @@ struct request_queue
307 make_request_fn *make_request_fn; 320 make_request_fn *make_request_fn;
308 prep_rq_fn *prep_rq_fn; 321 prep_rq_fn *prep_rq_fn;
309 unplug_fn *unplug_fn; 322 unplug_fn *unplug_fn;
323 prepare_discard_fn *prepare_discard_fn;
310 merge_bvec_fn *merge_bvec_fn; 324 merge_bvec_fn *merge_bvec_fn;
311 prepare_flush_fn *prepare_flush_fn; 325 prepare_flush_fn *prepare_flush_fn;
312 softirq_done_fn *softirq_done_fn; 326 softirq_done_fn *softirq_done_fn;
327 rq_timed_out_fn *rq_timed_out_fn;
313 dma_drain_needed_fn *dma_drain_needed; 328 dma_drain_needed_fn *dma_drain_needed;
329 lld_busy_fn *lld_busy_fn;
314 330
315 /* 331 /*
316 * Dispatch queue sorting 332 * Dispatch queue sorting
@@ -385,6 +401,10 @@ struct request_queue
385 unsigned int nr_sorted; 401 unsigned int nr_sorted;
386 unsigned int in_flight; 402 unsigned int in_flight;
387 403
404 unsigned int rq_timeout;
405 struct timer_list timeout;
406 struct list_head timeout_list;
407
388 /* 408 /*
389 * sg stuff 409 * sg stuff
390 */ 410 */
@@ -421,6 +441,10 @@ struct request_queue
421#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ 441#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */
422#define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ 442#define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */
423#define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ 443#define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */
444#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */
445#define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */
446#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */
447#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */
424 448
425static inline int queue_is_locked(struct request_queue *q) 449static inline int queue_is_locked(struct request_queue *q)
426{ 450{
@@ -526,7 +550,10 @@ enum {
526#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 550#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
527#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 551#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
528#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 552#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
553#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
529#define blk_queue_flushing(q) ((q)->ordseq) 554#define blk_queue_flushing(q) ((q)->ordseq)
555#define blk_queue_stackable(q) \
556 test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
530 557
531#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) 558#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS)
532#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) 559#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
@@ -536,16 +563,18 @@ enum {
536#define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) 563#define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST)
537#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) 564#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED)
538 565
539#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) 566#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq)))
540 567
541#define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) 568#define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
542#define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) 569#define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME)
543#define blk_pm_request(rq) \ 570#define blk_pm_request(rq) \
544 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) 571 (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
545 572
573#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1)
546#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) 574#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED)
547#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) 575#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER)
548#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) 576#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
577#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD)
549#define blk_bidi_rq(rq) ((rq)->next_rq != NULL) 578#define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
550#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) 579#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
551/* rq->queuelist of dequeued request must be list_empty() */ 580/* rq->queuelist of dequeued request must be list_empty() */
@@ -592,7 +621,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw)
592#define RQ_NOMERGE_FLAGS \ 621#define RQ_NOMERGE_FLAGS \
593 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) 622 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
594#define rq_mergeable(rq) \ 623#define rq_mergeable(rq) \
595 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) 624 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
625 (blk_discard_rq(rq) || blk_fs_request((rq))))
596 626
597/* 627/*
598 * q->prep_rq_fn return values 628 * q->prep_rq_fn return values
@@ -637,6 +667,12 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio)
637} 667}
638#endif /* CONFIG_MMU */ 668#endif /* CONFIG_MMU */
639 669
670struct rq_map_data {
671 struct page **pages;
672 int page_order;
673 int nr_entries;
674};
675
640struct req_iterator { 676struct req_iterator {
641 int i; 677 int i;
642 struct bio *bio; 678 struct bio *bio;
@@ -664,6 +700,10 @@ extern void __blk_put_request(struct request_queue *, struct request *);
664extern struct request *blk_get_request(struct request_queue *, int, gfp_t); 700extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
665extern void blk_insert_request(struct request_queue *, struct request *, int, void *); 701extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
666extern void blk_requeue_request(struct request_queue *, struct request *); 702extern void blk_requeue_request(struct request_queue *, struct request *);
703extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
704extern int blk_lld_busy(struct request_queue *q);
705extern int blk_insert_cloned_request(struct request_queue *q,
706 struct request *rq);
667extern void blk_plug_device(struct request_queue *); 707extern void blk_plug_device(struct request_queue *);
668extern void blk_plug_device_unlocked(struct request_queue *); 708extern void blk_plug_device_unlocked(struct request_queue *);
669extern int blk_remove_plug(struct request_queue *); 709extern int blk_remove_plug(struct request_queue *);
@@ -705,11 +745,14 @@ extern void __blk_stop_queue(struct request_queue *q);
705extern void __blk_run_queue(struct request_queue *); 745extern void __blk_run_queue(struct request_queue *);
706extern void blk_run_queue(struct request_queue *); 746extern void blk_run_queue(struct request_queue *);
707extern void blk_start_queueing(struct request_queue *); 747extern void blk_start_queueing(struct request_queue *);
708extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); 748extern int blk_rq_map_user(struct request_queue *, struct request *,
749 struct rq_map_data *, void __user *, unsigned long,
750 gfp_t);
709extern int blk_rq_unmap_user(struct bio *); 751extern int blk_rq_unmap_user(struct bio *);
710extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); 752extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
711extern int blk_rq_map_user_iov(struct request_queue *, struct request *, 753extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
712 struct sg_iovec *, int, unsigned int); 754 struct rq_map_data *, struct sg_iovec *, int,
755 unsigned int, gfp_t);
713extern int blk_execute_rq(struct request_queue *, struct gendisk *, 756extern int blk_execute_rq(struct request_queue *, struct gendisk *,
714 struct request *, int); 757 struct request *, int);
715extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 758extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
@@ -750,12 +793,15 @@ extern int __blk_end_request(struct request *rq, int error,
750extern int blk_end_bidi_request(struct request *rq, int error, 793extern int blk_end_bidi_request(struct request *rq, int error,
751 unsigned int nr_bytes, unsigned int bidi_bytes); 794 unsigned int nr_bytes, unsigned int bidi_bytes);
752extern void end_request(struct request *, int); 795extern void end_request(struct request *, int);
753extern void end_queued_request(struct request *, int);
754extern void end_dequeued_request(struct request *, int);
755extern int blk_end_request_callback(struct request *rq, int error, 796extern int blk_end_request_callback(struct request *rq, int error,
756 unsigned int nr_bytes, 797 unsigned int nr_bytes,
757 int (drv_callback)(struct request *)); 798 int (drv_callback)(struct request *));
758extern void blk_complete_request(struct request *); 799extern void blk_complete_request(struct request *);
800extern void __blk_complete_request(struct request *);
801extern void blk_abort_request(struct request *);
802extern void blk_abort_queue(struct request_queue *);
803extern void blk_update_request(struct request *rq, int error,
804 unsigned int nr_bytes);
759 805
760/* 806/*
761 * blk_end_request() takes bytes instead of sectors as a complete size. 807 * blk_end_request() takes bytes instead of sectors as a complete size.
@@ -790,12 +836,16 @@ extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
790extern int blk_queue_dma_drain(struct request_queue *q, 836extern int blk_queue_dma_drain(struct request_queue *q,
791 dma_drain_needed_fn *dma_drain_needed, 837 dma_drain_needed_fn *dma_drain_needed,
792 void *buf, unsigned int size); 838 void *buf, unsigned int size);
839extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
793extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); 840extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
794extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); 841extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
795extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); 842extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
796extern void blk_queue_dma_alignment(struct request_queue *, int); 843extern void blk_queue_dma_alignment(struct request_queue *, int);
797extern void blk_queue_update_dma_alignment(struct request_queue *, int); 844extern void blk_queue_update_dma_alignment(struct request_queue *, int);
798extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); 845extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
846extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
847extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
848extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
799extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 849extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
800extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); 850extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
801extern int blk_do_ordered(struct request_queue *, struct request **); 851extern int blk_do_ordered(struct request_queue *, struct request **);
@@ -837,6 +887,16 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
837} 887}
838 888
839extern int blkdev_issue_flush(struct block_device *, sector_t *); 889extern int blkdev_issue_flush(struct block_device *, sector_t *);
890extern int blkdev_issue_discard(struct block_device *,
891 sector_t sector, sector_t nr_sects, gfp_t);
892
893static inline int sb_issue_discard(struct super_block *sb,
894 sector_t block, sector_t nr_blocks)
895{
896 block <<= (sb->s_blocksize_bits - 9);
897 nr_blocks <<= (sb->s_blocksize_bits - 9);
898 return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL);
899}
840 900
841/* 901/*
842* command filter functions 902* command filter functions
@@ -874,6 +934,13 @@ static inline int queue_dma_alignment(struct request_queue *q)
874 return q ? q->dma_alignment : 511; 934 return q ? q->dma_alignment : 511;
875} 935}
876 936
937static inline int blk_rq_aligned(struct request_queue *q, void *addr,
938 unsigned int len)
939{
940 unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
941 return !((unsigned long)addr & alignment) && !(len & alignment);
942}
943
877/* assumes size > 256 */ 944/* assumes size > 256 */
878static inline unsigned int blksize_bits(unsigned int size) 945static inline unsigned int blksize_bits(unsigned int size)
879{ 946{
@@ -900,7 +967,7 @@ static inline void put_dev_sector(Sector p)
900} 967}
901 968
902struct work_struct; 969struct work_struct;
903int kblockd_schedule_work(struct work_struct *work); 970int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
904void kblockd_flush_work(struct work_struct *work); 971void kblockd_flush_work(struct work_struct *work);
905 972
906#define MODULE_ALIAS_BLOCKDEV(major,minor) \ 973#define MODULE_ALIAS_BLOCKDEV(major,minor) \
@@ -945,49 +1012,19 @@ struct blk_integrity {
945 1012
946extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); 1013extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
947extern void blk_integrity_unregister(struct gendisk *); 1014extern void blk_integrity_unregister(struct gendisk *);
948extern int blk_integrity_compare(struct block_device *, struct block_device *); 1015extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
949extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); 1016extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
950extern int blk_rq_count_integrity_sg(struct request *); 1017extern int blk_rq_count_integrity_sg(struct request *);
951 1018
952static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) 1019static inline
953{ 1020struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
954 if (bi)
955 return bi->tuple_size;
956
957 return 0;
958}
959
960static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
961{ 1021{
962 return bdev->bd_disk->integrity; 1022 return bdev->bd_disk->integrity;
963} 1023}
964 1024
965static inline unsigned int bdev_get_tag_size(struct block_device *bdev) 1025static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
966{ 1026{
967 struct blk_integrity *bi = bdev_get_integrity(bdev); 1027 return disk->integrity;
968
969 if (bi)
970 return bi->tag_size;
971
972 return 0;
973}
974
975static inline int bdev_integrity_enabled(struct block_device *bdev, int rw)
976{
977 struct blk_integrity *bi = bdev_get_integrity(bdev);
978
979 if (bi == NULL)
980 return 0;
981
982 if (rw == READ && bi->verify_fn != NULL &&
983 (bi->flags & INTEGRITY_FLAG_READ))
984 return 1;
985
986 if (rw == WRITE && bi->generate_fn != NULL &&
987 (bi->flags & INTEGRITY_FLAG_WRITE))
988 return 1;
989
990 return 0;
991} 1028}
992 1029
993static inline int blk_integrity_rq(struct request *rq) 1030static inline int blk_integrity_rq(struct request *rq)
@@ -1004,7 +1041,7 @@ static inline int blk_integrity_rq(struct request *rq)
1004#define blk_rq_count_integrity_sg(a) (0) 1041#define blk_rq_count_integrity_sg(a) (0)
1005#define blk_rq_map_integrity_sg(a, b) (0) 1042#define blk_rq_map_integrity_sg(a, b) (0)
1006#define bdev_get_integrity(a) (0) 1043#define bdev_get_integrity(a) (0)
1007#define bdev_get_tag_size(a) (0) 1044#define blk_get_integrity(a) (0)
1008#define blk_integrity_compare(a, b) (0) 1045#define blk_integrity_compare(a, b) (0)
1009#define blk_integrity_register(a, b) (0) 1046#define blk_integrity_register(a, b) (0)
1010#define blk_integrity_unregister(a) do { } while (0); 1047#define blk_integrity_unregister(a) do { } while (0);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index d084b8d227a5..3a31eb506164 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -1,8 +1,10 @@
1#ifndef BLKTRACE_H 1#ifndef BLKTRACE_H
2#define BLKTRACE_H 2#define BLKTRACE_H
3 3
4#ifdef __KERNEL__
4#include <linux/blkdev.h> 5#include <linux/blkdev.h>
5#include <linux/relay.h> 6#include <linux/relay.h>
7#endif
6 8
7/* 9/*
8 * Trace categories 10 * Trace categories
@@ -21,6 +23,7 @@ enum blktrace_cat {
21 BLK_TC_NOTIFY = 1 << 10, /* special message */ 23 BLK_TC_NOTIFY = 1 << 10, /* special message */
22 BLK_TC_AHEAD = 1 << 11, /* readahead */ 24 BLK_TC_AHEAD = 1 << 11, /* readahead */
23 BLK_TC_META = 1 << 12, /* metadata */ 25 BLK_TC_META = 1 << 12, /* metadata */
26 BLK_TC_DISCARD = 1 << 13, /* discard requests */
24 27
25 BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ 28 BLK_TC_END = 1 << 15, /* only 16-bits, reminder */
26}; 29};
@@ -47,6 +50,7 @@ enum blktrace_act {
47 __BLK_TA_SPLIT, /* bio was split */ 50 __BLK_TA_SPLIT, /* bio was split */
48 __BLK_TA_BOUNCE, /* bio was bounced */ 51 __BLK_TA_BOUNCE, /* bio was bounced */
49 __BLK_TA_REMAP, /* bio was remapped */ 52 __BLK_TA_REMAP, /* bio was remapped */
53 __BLK_TA_ABORT, /* request aborted */
50}; 54};
51 55
52/* 56/*
@@ -77,6 +81,7 @@ enum blktrace_notify {
77#define BLK_TA_SPLIT (__BLK_TA_SPLIT) 81#define BLK_TA_SPLIT (__BLK_TA_SPLIT)
78#define BLK_TA_BOUNCE (__BLK_TA_BOUNCE) 82#define BLK_TA_BOUNCE (__BLK_TA_BOUNCE)
79#define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE)) 83#define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE))
84#define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE))
80 85
81#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) 86#define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY))
82#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) 87#define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY))
@@ -89,17 +94,17 @@ enum blktrace_notify {
89 * The trace itself 94 * The trace itself
90 */ 95 */
91struct blk_io_trace { 96struct blk_io_trace {
92 u32 magic; /* MAGIC << 8 | version */ 97 __u32 magic; /* MAGIC << 8 | version */
93 u32 sequence; /* event number */ 98 __u32 sequence; /* event number */
94 u64 time; /* in microseconds */ 99 __u64 time; /* in microseconds */
95 u64 sector; /* disk offset */ 100 __u64 sector; /* disk offset */
96 u32 bytes; /* transfer length */ 101 __u32 bytes; /* transfer length */
97 u32 action; /* what happened */ 102 __u32 action; /* what happened */
98 u32 pid; /* who did it */ 103 __u32 pid; /* who did it */
99 u32 device; /* device number */ 104 __u32 device; /* device number */
100 u32 cpu; /* on what cpu did it happen */ 105 __u32 cpu; /* on what cpu did it happen */
101 u16 error; /* completion error */ 106 __u16 error; /* completion error */
102 u16 pdu_len; /* length of data after this trace */ 107 __u16 pdu_len; /* length of data after this trace */
103}; 108};
104 109
105/* 110/*
@@ -117,6 +122,23 @@ enum {
117 Blktrace_stopped, 122 Blktrace_stopped,
118}; 123};
119 124
125#define BLKTRACE_BDEV_SIZE 32
126
127/*
128 * User setup structure passed with BLKTRACESTART
129 */
130struct blk_user_trace_setup {
131 char name[BLKTRACE_BDEV_SIZE]; /* output */
132 __u16 act_mask; /* input */
133 __u32 buf_size; /* input */
134 __u32 buf_nr; /* input */
135 __u64 start_lba;
136 __u64 end_lba;
137 __u32 pid;
138};
139
140#ifdef __KERNEL__
141#if defined(CONFIG_BLK_DEV_IO_TRACE)
120struct blk_trace { 142struct blk_trace {
121 int trace_state; 143 int trace_state;
122 struct rchan *rchan; 144 struct rchan *rchan;
@@ -133,21 +155,6 @@ struct blk_trace {
133 atomic_t dropped; 155 atomic_t dropped;
134}; 156};
135 157
136/*
137 * User setup structure passed with BLKTRACESTART
138 */
139struct blk_user_trace_setup {
140 char name[BDEVNAME_SIZE]; /* output */
141 u16 act_mask; /* input */
142 u32 buf_size; /* input */
143 u32 buf_nr; /* input */
144 u64 start_lba;
145 u64 end_lba;
146 u32 pid;
147};
148
149#ifdef __KERNEL__
150#if defined(CONFIG_BLK_DEV_IO_TRACE)
151extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); 158extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
152extern void blk_trace_shutdown(struct request_queue *); 159extern void blk_trace_shutdown(struct request_queue *);
153extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); 160extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *);
@@ -195,6 +202,9 @@ static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq,
195 if (likely(!bt)) 202 if (likely(!bt))
196 return; 203 return;
197 204
205 if (blk_discard_rq(rq))
206 rw |= (1 << BIO_RW_DISCARD);
207
198 if (blk_pc_request(rq)) { 208 if (blk_pc_request(rq)) {
199 what |= BLK_TC_ACT(BLK_TC_PC); 209 what |= BLK_TC_ACT(BLK_TC_PC);
200 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); 210 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd);
diff --git a/include/linux/device.h b/include/linux/device.h
index 4d8372d135df..246937c9cbc7 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -199,6 +199,11 @@ struct class {
199 struct class_private *p; 199 struct class_private *p;
200}; 200};
201 201
202struct class_dev_iter {
203 struct klist_iter ki;
204 const struct device_type *type;
205};
206
202extern struct kobject *sysfs_dev_block_kobj; 207extern struct kobject *sysfs_dev_block_kobj;
203extern struct kobject *sysfs_dev_char_kobj; 208extern struct kobject *sysfs_dev_char_kobj;
204extern int __must_check __class_register(struct class *class, 209extern int __must_check __class_register(struct class *class,
@@ -213,6 +218,13 @@ extern void class_unregister(struct class *class);
213 __class_register(class, &__key); \ 218 __class_register(class, &__key); \
214}) 219})
215 220
221extern void class_dev_iter_init(struct class_dev_iter *iter,
222 struct class *class,
223 struct device *start,
224 const struct device_type *type);
225extern struct device *class_dev_iter_next(struct class_dev_iter *iter);
226extern void class_dev_iter_exit(struct class_dev_iter *iter);
227
216extern int class_for_each_device(struct class *class, struct device *start, 228extern int class_for_each_device(struct class *class, struct device *start,
217 void *data, 229 void *data,
218 int (*fn)(struct device *dev, void *data)); 230 int (*fn)(struct device *dev, void *data));
@@ -396,7 +408,7 @@ struct device {
396 spinlock_t devres_lock; 408 spinlock_t devres_lock;
397 struct list_head devres_head; 409 struct list_head devres_head;
398 410
399 struct list_head node; 411 struct klist_node knode_class;
400 struct class *class; 412 struct class *class;
401 dev_t devt; /* dev_t, creates the sysfs "dev" */ 413 dev_t devt; /* dev_t, creates the sysfs "dev" */
402 struct attribute_group **groups; /* optional groups */ 414 struct attribute_group **groups; /* optional groups */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 639624b55fbe..92f6f634e3e6 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -112,6 +112,7 @@ extern struct request *elv_latter_request(struct request_queue *, struct request
112extern int elv_register_queue(struct request_queue *q); 112extern int elv_register_queue(struct request_queue *q);
113extern void elv_unregister_queue(struct request_queue *q); 113extern void elv_unregister_queue(struct request_queue *q);
114extern int elv_may_queue(struct request_queue *, int); 114extern int elv_may_queue(struct request_queue *, int);
115extern void elv_abort_queue(struct request_queue *);
115extern void elv_completed_request(struct request_queue *, struct request *); 116extern void elv_completed_request(struct request_queue *, struct request *);
116extern int elv_set_request(struct request_queue *, struct request *, gfp_t); 117extern int elv_set_request(struct request_queue *, struct request *, gfp_t);
117extern void elv_put_request(struct request_queue *, struct request *); 118extern void elv_put_request(struct request_queue *, struct request *);
@@ -173,15 +174,15 @@ enum {
173#define rb_entry_rq(node) rb_entry((node), struct request, rb_node) 174#define rb_entry_rq(node) rb_entry((node), struct request, rb_node)
174 175
175/* 176/*
176 * Hack to reuse the donelist list_head as the fifo time holder while 177 * Hack to reuse the csd.list list_head as the fifo time holder while
177 * the request is in the io scheduler. Saves an unsigned long in rq. 178 * the request is in the io scheduler. Saves an unsigned long in rq.
178 */ 179 */
179#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next) 180#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next)
180#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp)) 181#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp))
181#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) 182#define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist)
182#define rq_fifo_clear(rq) do { \ 183#define rq_fifo_clear(rq) do { \
183 list_del_init(&(rq)->queuelist); \ 184 list_del_init(&(rq)->queuelist); \
184 INIT_LIST_HEAD(&(rq)->donelist); \ 185 INIT_LIST_HEAD(&(rq)->csd.list); \
185 } while (0) 186 } while (0)
186 187
187/* 188/*
diff --git a/include/linux/fd.h b/include/linux/fd.h
index b6bd41d2b460..f5d194af07a8 100644
--- a/include/linux/fd.h
+++ b/include/linux/fd.h
@@ -15,10 +15,16 @@ struct floppy_struct {
15 sect, /* sectors per track */ 15 sect, /* sectors per track */
16 head, /* nr of heads */ 16 head, /* nr of heads */
17 track, /* nr of tracks */ 17 track, /* nr of tracks */
18 stretch; /* !=0 means double track steps */ 18 stretch; /* bit 0 !=0 means double track steps */
19 /* bit 1 != 0 means swap sides */
20 /* bits 2..9 give the first sector */
21 /* number (the LSB is flipped) */
19#define FD_STRETCH 1 22#define FD_STRETCH 1
20#define FD_SWAPSIDES 2 23#define FD_SWAPSIDES 2
21#define FD_ZEROBASED 4 24#define FD_ZEROBASED 4
25#define FD_SECTBASEMASK 0x3FC
26#define FD_MKSECTBASE(s) (((s) ^ 1) << 2)
27#define FD_SECTBASE(floppy) ((((floppy)->stretch & FD_SECTBASEMASK) >> 2) ^ 1)
22 28
23 unsigned char gap, /* gap1 size */ 29 unsigned char gap, /* gap1 size */
24 30
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 580b513668fe..32477e8872d5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -86,7 +86,9 @@ extern int dir_notify_enable;
86#define READ_META (READ | (1 << BIO_RW_META)) 86#define READ_META (READ | (1 << BIO_RW_META))
87#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) 87#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC))
88#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) 88#define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC))
89#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) 89#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER))
90#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
91#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
90 92
91#define SEL_IN 1 93#define SEL_IN 1
92#define SEL_OUT 2 94#define SEL_OUT 2
@@ -222,6 +224,7 @@ extern int dir_notify_enable;
222#define BLKTRACESTART _IO(0x12,116) 224#define BLKTRACESTART _IO(0x12,116)
223#define BLKTRACESTOP _IO(0x12,117) 225#define BLKTRACESTOP _IO(0x12,117)
224#define BLKTRACETEARDOWN _IO(0x12,118) 226#define BLKTRACETEARDOWN _IO(0x12,118)
227#define BLKDISCARD _IO(0x12,119)
225 228
226#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ 229#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
227#define FIBMAP _IO(0x00,1) /* bmap access */ 230#define FIBMAP _IO(0x00,1) /* bmap access */
@@ -1682,6 +1685,7 @@ extern void chrdev_show(struct seq_file *,off_t);
1682 1685
1683/* fs/block_dev.c */ 1686/* fs/block_dev.c */
1684#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ 1687#define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */
1688#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */
1685 1689
1686#ifdef CONFIG_BLOCK 1690#ifdef CONFIG_BLOCK
1687#define BLKDEV_MAJOR_HASH_SIZE 255 1691#define BLKDEV_MAJOR_HASH_SIZE 255
@@ -1718,6 +1722,9 @@ extern int fs_may_remount_ro(struct super_block *);
1718 */ 1722 */
1719#define bio_data_dir(bio) ((bio)->bi_rw & 1) 1723#define bio_data_dir(bio) ((bio)->bi_rw & 1)
1720 1724
1725extern void check_disk_size_change(struct gendisk *disk,
1726 struct block_device *bdev);
1727extern int revalidate_disk(struct gendisk *);
1721extern int check_disk_change(struct block_device *); 1728extern int check_disk_change(struct block_device *);
1722extern int __invalidate_device(struct block_device *); 1729extern int __invalidate_device(struct block_device *);
1723extern int invalidate_partition(struct gendisk *, int); 1730extern int invalidate_partition(struct gendisk *, int);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index be4f5e5bfe06..206cdf96c3a7 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -11,12 +11,15 @@
11 11
12#include <linux/types.h> 12#include <linux/types.h>
13#include <linux/kdev_t.h> 13#include <linux/kdev_t.h>
14#include <linux/rcupdate.h>
14 15
15#ifdef CONFIG_BLOCK 16#ifdef CONFIG_BLOCK
16 17
17#define kobj_to_dev(k) container_of(k, struct device, kobj) 18#define kobj_to_dev(k) container_of((k), struct device, kobj)
18#define dev_to_disk(device) container_of(device, struct gendisk, dev) 19#define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev)
19#define dev_to_part(device) container_of(device, struct hd_struct, dev) 20#define dev_to_part(device) container_of((device), struct hd_struct, __dev)
21#define disk_to_dev(disk) (&(disk)->part0.__dev)
22#define part_to_dev(part) (&((part)->__dev))
20 23
21extern struct device_type part_type; 24extern struct device_type part_type;
22extern struct kobject *block_depr; 25extern struct kobject *block_depr;
@@ -55,6 +58,9 @@ enum {
55 UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ 58 UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */
56}; 59};
57 60
61#define DISK_MAX_PARTS 256
62#define DISK_NAME_LEN 32
63
58#include <linux/major.h> 64#include <linux/major.h>
59#include <linux/device.h> 65#include <linux/device.h>
60#include <linux/smp.h> 66#include <linux/smp.h>
@@ -87,7 +93,7 @@ struct disk_stats {
87struct hd_struct { 93struct hd_struct {
88 sector_t start_sect; 94 sector_t start_sect;
89 sector_t nr_sects; 95 sector_t nr_sects;
90 struct device dev; 96 struct device __dev;
91 struct kobject *holder_dir; 97 struct kobject *holder_dir;
92 int policy, partno; 98 int policy, partno;
93#ifdef CONFIG_FAIL_MAKE_REQUEST 99#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -100,6 +106,7 @@ struct hd_struct {
100#else 106#else
101 struct disk_stats dkstats; 107 struct disk_stats dkstats;
102#endif 108#endif
109 struct rcu_head rcu_head;
103}; 110};
104 111
105#define GENHD_FL_REMOVABLE 1 112#define GENHD_FL_REMOVABLE 1
@@ -108,100 +115,148 @@ struct hd_struct {
108#define GENHD_FL_CD 8 115#define GENHD_FL_CD 8
109#define GENHD_FL_UP 16 116#define GENHD_FL_UP 16
110#define GENHD_FL_SUPPRESS_PARTITION_INFO 32 117#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
111#define GENHD_FL_FAIL 64 118#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
119
120#define BLK_SCSI_MAX_CMDS (256)
121#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
122
123struct blk_scsi_cmd_filter {
124 unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
125 unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
126 struct kobject kobj;
127};
128
129struct disk_part_tbl {
130 struct rcu_head rcu_head;
131 int len;
132 struct hd_struct *part[];
133};
112 134
113struct gendisk { 135struct gendisk {
136 /* major, first_minor and minors are input parameters only,
137 * don't use directly. Use disk_devt() and disk_max_parts().
138 */
114 int major; /* major number of driver */ 139 int major; /* major number of driver */
115 int first_minor; 140 int first_minor;
116 int minors; /* maximum number of minors, =1 for 141 int minors; /* maximum number of minors, =1 for
117 * disks that can't be partitioned. */ 142 * disks that can't be partitioned. */
118 char disk_name[32]; /* name of major driver */ 143
119 struct hd_struct **part; /* [indexed by minor] */ 144 char disk_name[DISK_NAME_LEN]; /* name of major driver */
145
146 /* Array of pointers to partitions indexed by partno.
147 * Protected with matching bdev lock but stat and other
148 * non-critical accesses use RCU. Always access through
149 * helpers.
150 */
151 struct disk_part_tbl *part_tbl;
152 struct hd_struct part0;
153
120 struct block_device_operations *fops; 154 struct block_device_operations *fops;
121 struct request_queue *queue; 155 struct request_queue *queue;
122 void *private_data; 156 void *private_data;
123 sector_t capacity;
124 157
125 int flags; 158 int flags;
126 struct device *driverfs_dev; // FIXME: remove 159 struct device *driverfs_dev; // FIXME: remove
127 struct device dev;
128 struct kobject *holder_dir;
129 struct kobject *slave_dir; 160 struct kobject *slave_dir;
130 161
131 struct timer_rand_state *random; 162 struct timer_rand_state *random;
132 int policy;
133 163
134 atomic_t sync_io; /* RAID */ 164 atomic_t sync_io; /* RAID */
135 unsigned long stamp;
136 int in_flight;
137#ifdef CONFIG_SMP
138 struct disk_stats *dkstats;
139#else
140 struct disk_stats dkstats;
141#endif
142 struct work_struct async_notify; 165 struct work_struct async_notify;
143#ifdef CONFIG_BLK_DEV_INTEGRITY 166#ifdef CONFIG_BLK_DEV_INTEGRITY
144 struct blk_integrity *integrity; 167 struct blk_integrity *integrity;
145#endif 168#endif
169 int node_id;
146}; 170};
147 171
148/* 172static inline struct gendisk *part_to_disk(struct hd_struct *part)
149 * Macros to operate on percpu disk statistics:
150 *
151 * The __ variants should only be called in critical sections. The full
152 * variants disable/enable preemption.
153 */
154static inline struct hd_struct *get_part(struct gendisk *gendiskp,
155 sector_t sector)
156{ 173{
157 struct hd_struct *part; 174 if (likely(part)) {
158 int i; 175 if (part->partno)
159 for (i = 0; i < gendiskp->minors - 1; i++) { 176 return dev_to_disk(part_to_dev(part)->parent);
160 part = gendiskp->part[i]; 177 else
161 if (part && part->start_sect <= sector 178 return dev_to_disk(part_to_dev(part));
162 && sector < part->start_sect + part->nr_sects)
163 return part;
164 } 179 }
165 return NULL; 180 return NULL;
166} 181}
167 182
168#ifdef CONFIG_SMP 183static inline int disk_max_parts(struct gendisk *disk)
169#define __disk_stat_add(gendiskp, field, addnd) \ 184{
170 (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) 185 if (disk->flags & GENHD_FL_EXT_DEVT)
186 return DISK_MAX_PARTS;
187 return disk->minors;
188}
171 189
172#define disk_stat_read(gendiskp, field) \ 190static inline bool disk_partitionable(struct gendisk *disk)
173({ \ 191{
174 typeof(gendiskp->dkstats->field) res = 0; \ 192 return disk_max_parts(disk) > 1;
175 int i; \ 193}
176 for_each_possible_cpu(i) \
177 res += per_cpu_ptr(gendiskp->dkstats, i)->field; \
178 res; \
179})
180 194
181static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { 195static inline dev_t disk_devt(struct gendisk *disk)
182 int i; 196{
197 return disk_to_dev(disk)->devt;
198}
183 199
184 for_each_possible_cpu(i) 200static inline dev_t part_devt(struct hd_struct *part)
185 memset(per_cpu_ptr(gendiskp->dkstats, i), value, 201{
186 sizeof(struct disk_stats)); 202 return part_to_dev(part)->devt;
187} 203}
188 204
189#define __part_stat_add(part, field, addnd) \ 205extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno);
190 (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
191 206
192#define __all_stat_add(gendiskp, part, field, addnd, sector) \ 207static inline void disk_put_part(struct hd_struct *part)
193({ \ 208{
194 if (part) \ 209 if (likely(part))
195 __part_stat_add(part, field, addnd); \ 210 put_device(part_to_dev(part));
196 __disk_stat_add(gendiskp, field, addnd); \ 211}
197}) 212
213/*
214 * Smarter partition iterator without context limits.
215 */
216#define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */
217#define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */
218#define DISK_PITER_INCL_PART0 (1 << 2) /* include partition 0 */
219
220struct disk_part_iter {
221 struct gendisk *disk;
222 struct hd_struct *part;
223 int idx;
224 unsigned int flags;
225};
226
227extern void disk_part_iter_init(struct disk_part_iter *piter,
228 struct gendisk *disk, unsigned int flags);
229extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter);
230extern void disk_part_iter_exit(struct disk_part_iter *piter);
231
232extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
233 sector_t sector);
234
235/*
236 * Macros to operate on percpu disk statistics:
237 *
238 * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters
239 * and should be called between disk_stat_lock() and
240 * disk_stat_unlock().
241 *
242 * part_stat_read() can be called at any time.
243 *
244 * part_stat_{add|set_all}() and {init|free}_part_stats are for
245 * internal use only.
246 */
247#ifdef CONFIG_SMP
248#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); })
249#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0)
250
251#define __part_stat_add(cpu, part, field, addnd) \
252 (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd))
198 253
199#define part_stat_read(part, field) \ 254#define part_stat_read(part, field) \
200({ \ 255({ \
201 typeof(part->dkstats->field) res = 0; \ 256 typeof((part)->dkstats->field) res = 0; \
202 int i; \ 257 int i; \
203 for_each_possible_cpu(i) \ 258 for_each_possible_cpu(i) \
204 res += per_cpu_ptr(part->dkstats, i)->field; \ 259 res += per_cpu_ptr((part)->dkstats, i)->field; \
205 res; \ 260 res; \
206}) 261})
207 262
@@ -213,171 +268,107 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
213 memset(per_cpu_ptr(part->dkstats, i), value, 268 memset(per_cpu_ptr(part->dkstats, i), value,
214 sizeof(struct disk_stats)); 269 sizeof(struct disk_stats));
215} 270}
216
217#else /* !CONFIG_SMP */
218#define __disk_stat_add(gendiskp, field, addnd) \
219 (gendiskp->dkstats.field += addnd)
220#define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field)
221 271
222static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) 272static inline int init_part_stats(struct hd_struct *part)
223{ 273{
224 memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); 274 part->dkstats = alloc_percpu(struct disk_stats);
275 if (!part->dkstats)
276 return 0;
277 return 1;
225} 278}
226 279
227#define __part_stat_add(part, field, addnd) \ 280static inline void free_part_stats(struct hd_struct *part)
228 (part->dkstats.field += addnd)
229
230#define __all_stat_add(gendiskp, part, field, addnd, sector) \
231({ \
232 if (part) \
233 part->dkstats.field += addnd; \
234 __disk_stat_add(gendiskp, field, addnd); \
235})
236
237#define part_stat_read(part, field) (part->dkstats.field)
238
239static inline void part_stat_set_all(struct hd_struct *part, int value)
240{ 281{
241 memset(&part->dkstats, value, sizeof(struct disk_stats)); 282 free_percpu(part->dkstats);
242} 283}
243 284
244#endif /* CONFIG_SMP */ 285#else /* !CONFIG_SMP */
286#define part_stat_lock() ({ rcu_read_lock(); 0; })
287#define part_stat_unlock() rcu_read_unlock()
245 288
246#define disk_stat_add(gendiskp, field, addnd) \ 289#define __part_stat_add(cpu, part, field, addnd) \
247 do { \ 290 ((part)->dkstats.field += addnd)
248 preempt_disable(); \ 291
249 __disk_stat_add(gendiskp, field, addnd); \ 292#define part_stat_read(part, field) ((part)->dkstats.field)
250 preempt_enable(); \
251 } while (0)
252
253#define __disk_stat_dec(gendiskp, field) __disk_stat_add(gendiskp, field, -1)
254#define disk_stat_dec(gendiskp, field) disk_stat_add(gendiskp, field, -1)
255
256#define __disk_stat_inc(gendiskp, field) __disk_stat_add(gendiskp, field, 1)
257#define disk_stat_inc(gendiskp, field) disk_stat_add(gendiskp, field, 1)
258
259#define __disk_stat_sub(gendiskp, field, subnd) \
260 __disk_stat_add(gendiskp, field, -subnd)
261#define disk_stat_sub(gendiskp, field, subnd) \
262 disk_stat_add(gendiskp, field, -subnd)
263
264#define part_stat_add(gendiskp, field, addnd) \
265 do { \
266 preempt_disable(); \
267 __part_stat_add(gendiskp, field, addnd);\
268 preempt_enable(); \
269 } while (0)
270
271#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1)
272#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1)
273
274#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1)
275#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1)
276
277#define __part_stat_sub(gendiskp, field, subnd) \
278 __part_stat_add(gendiskp, field, -subnd)
279#define part_stat_sub(gendiskp, field, subnd) \
280 part_stat_add(gendiskp, field, -subnd)
281
282#define all_stat_add(gendiskp, part, field, addnd, sector) \
283 do { \
284 preempt_disable(); \
285 __all_stat_add(gendiskp, part, field, addnd, sector); \
286 preempt_enable(); \
287 } while (0)
288
289#define __all_stat_dec(gendiskp, field, sector) \
290 __all_stat_add(gendiskp, field, -1, sector)
291#define all_stat_dec(gendiskp, field, sector) \
292 all_stat_add(gendiskp, field, -1, sector)
293
294#define __all_stat_inc(gendiskp, part, field, sector) \
295 __all_stat_add(gendiskp, part, field, 1, sector)
296#define all_stat_inc(gendiskp, part, field, sector) \
297 all_stat_add(gendiskp, part, field, 1, sector)
298
299#define __all_stat_sub(gendiskp, part, field, subnd, sector) \
300 __all_stat_add(gendiskp, part, field, -subnd, sector)
301#define all_stat_sub(gendiskp, part, field, subnd, sector) \
302 all_stat_add(gendiskp, part, field, -subnd, sector)
303
304/* Inlines to alloc and free disk stats in struct gendisk */
305#ifdef CONFIG_SMP
306static inline int init_disk_stats(struct gendisk *disk)
307{
308 disk->dkstats = alloc_percpu(struct disk_stats);
309 if (!disk->dkstats)
310 return 0;
311 return 1;
312}
313 293
314static inline void free_disk_stats(struct gendisk *disk) 294static inline void part_stat_set_all(struct hd_struct *part, int value)
315{ 295{
316 free_percpu(disk->dkstats); 296 memset(&part->dkstats, value, sizeof(struct disk_stats));
317} 297}
318 298
319static inline int init_part_stats(struct hd_struct *part) 299static inline int init_part_stats(struct hd_struct *part)
320{ 300{
321 part->dkstats = alloc_percpu(struct disk_stats);
322 if (!part->dkstats)
323 return 0;
324 return 1; 301 return 1;
325} 302}
326 303
327static inline void free_part_stats(struct hd_struct *part) 304static inline void free_part_stats(struct hd_struct *part)
328{ 305{
329 free_percpu(part->dkstats);
330}
331
332#else /* CONFIG_SMP */
333static inline int init_disk_stats(struct gendisk *disk)
334{
335 return 1;
336} 306}
337 307
338static inline void free_disk_stats(struct gendisk *disk) 308#endif /* CONFIG_SMP */
339{
340}
341 309
342static inline int init_part_stats(struct hd_struct *part) 310#define part_stat_add(cpu, part, field, addnd) do { \
311 __part_stat_add((cpu), (part), field, addnd); \
312 if ((part)->partno) \
313 __part_stat_add((cpu), &part_to_disk((part))->part0, \
314 field, addnd); \
315} while (0)
316
317#define part_stat_dec(cpu, gendiskp, field) \
318 part_stat_add(cpu, gendiskp, field, -1)
319#define part_stat_inc(cpu, gendiskp, field) \
320 part_stat_add(cpu, gendiskp, field, 1)
321#define part_stat_sub(cpu, gendiskp, field, subnd) \
322 part_stat_add(cpu, gendiskp, field, -subnd)
323
324static inline void part_inc_in_flight(struct hd_struct *part)
343{ 325{
344 return 1; 326 part->in_flight++;
327 if (part->partno)
328 part_to_disk(part)->part0.in_flight++;
345} 329}
346 330
347static inline void free_part_stats(struct hd_struct *part) 331static inline void part_dec_in_flight(struct hd_struct *part)
348{ 332{
333 part->in_flight--;
334 if (part->partno)
335 part_to_disk(part)->part0.in_flight--;
349} 336}
350#endif /* CONFIG_SMP */
351 337
352/* drivers/block/ll_rw_blk.c */ 338/* drivers/block/ll_rw_blk.c */
353extern void disk_round_stats(struct gendisk *disk); 339extern void part_round_stats(int cpu, struct hd_struct *part);
354extern void part_round_stats(struct hd_struct *part);
355 340
356/* drivers/block/genhd.c */ 341/* drivers/block/genhd.c */
357extern int get_blkdev_list(char *, int); 342extern int get_blkdev_list(char *, int);
358extern void add_disk(struct gendisk *disk); 343extern void add_disk(struct gendisk *disk);
359extern void del_gendisk(struct gendisk *gp); 344extern void del_gendisk(struct gendisk *gp);
360extern void unlink_gendisk(struct gendisk *gp); 345extern void unlink_gendisk(struct gendisk *gp);
361extern struct gendisk *get_gendisk(dev_t dev, int *part); 346extern struct gendisk *get_gendisk(dev_t dev, int *partno);
347extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
362 348
363extern void set_device_ro(struct block_device *bdev, int flag); 349extern void set_device_ro(struct block_device *bdev, int flag);
364extern void set_disk_ro(struct gendisk *disk, int flag); 350extern void set_disk_ro(struct gendisk *disk, int flag);
365 351
352static inline int get_disk_ro(struct gendisk *disk)
353{
354 return disk->part0.policy;
355}
356
366/* drivers/char/random.c */ 357/* drivers/char/random.c */
367extern void add_disk_randomness(struct gendisk *disk); 358extern void add_disk_randomness(struct gendisk *disk);
368extern void rand_initialize_disk(struct gendisk *disk); 359extern void rand_initialize_disk(struct gendisk *disk);
369 360
370static inline sector_t get_start_sect(struct block_device *bdev) 361static inline sector_t get_start_sect(struct block_device *bdev)
371{ 362{
372 return bdev->bd_contains == bdev ? 0 : bdev->bd_part->start_sect; 363 return bdev->bd_part->start_sect;
373} 364}
374static inline sector_t get_capacity(struct gendisk *disk) 365static inline sector_t get_capacity(struct gendisk *disk)
375{ 366{
376 return disk->capacity; 367 return disk->part0.nr_sects;
377} 368}
378static inline void set_capacity(struct gendisk *disk, sector_t size) 369static inline void set_capacity(struct gendisk *disk, sector_t size)
379{ 370{
380 disk->capacity = size; 371 disk->part0.nr_sects = size;
381} 372}
382 373
383#ifdef CONFIG_SOLARIS_X86_PARTITION 374#ifdef CONFIG_SOLARIS_X86_PARTITION
@@ -527,9 +518,12 @@ struct unixware_disklabel {
527#define ADDPART_FLAG_RAID 1 518#define ADDPART_FLAG_RAID 1
528#define ADDPART_FLAG_WHOLEDISK 2 519#define ADDPART_FLAG_WHOLEDISK 2
529 520
530extern dev_t blk_lookup_devt(const char *name, int part); 521extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt);
531extern char *disk_name (struct gendisk *hd, int part, char *buf); 522extern void blk_free_devt(dev_t devt);
523extern dev_t blk_lookup_devt(const char *name, int partno);
524extern char *disk_name (struct gendisk *hd, int partno, char *buf);
532 525
526extern int disk_expand_part_tbl(struct gendisk *disk, int target);
533extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); 527extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
534extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); 528extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int);
535extern void delete_partition(struct gendisk *, int); 529extern void delete_partition(struct gendisk *, int);
@@ -546,16 +540,23 @@ extern void blk_register_region(dev_t devt, unsigned long range,
546 void *data); 540 void *data);
547extern void blk_unregister_region(dev_t devt, unsigned long range); 541extern void blk_unregister_region(dev_t devt, unsigned long range);
548 542
549static inline struct block_device *bdget_disk(struct gendisk *disk, int index) 543extern ssize_t part_size_show(struct device *dev,
550{ 544 struct device_attribute *attr, char *buf);
551 return bdget(MKDEV(disk->major, disk->first_minor) + index); 545extern ssize_t part_stat_show(struct device *dev,
552} 546 struct device_attribute *attr, char *buf);
547#ifdef CONFIG_FAIL_MAKE_REQUEST
548extern ssize_t part_fail_show(struct device *dev,
549 struct device_attribute *attr, char *buf);
550extern ssize_t part_fail_store(struct device *dev,
551 struct device_attribute *attr,
552 const char *buf, size_t count);
553#endif /* CONFIG_FAIL_MAKE_REQUEST */
553 554
554#else /* CONFIG_BLOCK */ 555#else /* CONFIG_BLOCK */
555 556
556static inline void printk_all_partitions(void) { } 557static inline void printk_all_partitions(void) { }
557 558
558static inline dev_t blk_lookup_devt(const char *name, int part) 559static inline dev_t blk_lookup_devt(const char *name, int partno)
559{ 560{
560 dev_t devt = MKDEV(0, 0); 561 dev_t devt = MKDEV(0, 0);
561 return devt; 562 return devt;
diff --git a/include/linux/klist.h b/include/linux/klist.h
index 06c338ef7f1b..8ea98db223e5 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -38,7 +38,7 @@ extern void klist_init(struct klist *k, void (*get)(struct klist_node *),
38 void (*put)(struct klist_node *)); 38 void (*put)(struct klist_node *));
39 39
40struct klist_node { 40struct klist_node {
41 struct klist *n_klist; 41 void *n_klist; /* never access directly */
42 struct list_head n_node; 42 struct list_head n_node;
43 struct kref n_ref; 43 struct kref n_ref;
44 struct completion n_removed; 44 struct completion n_removed;
@@ -57,7 +57,6 @@ extern int klist_node_attached(struct klist_node *n);
57 57
58struct klist_iter { 58struct klist_iter {
59 struct klist *i_klist; 59 struct klist *i_klist;
60 struct list_head *i_head;
61 struct klist_node *i_cur; 60 struct klist_node *i_cur;
62}; 61};
63 62
diff --git a/include/linux/major.h b/include/linux/major.h
index 53d5fafd85c3..88249452b935 100644
--- a/include/linux/major.h
+++ b/include/linux/major.h
@@ -170,4 +170,6 @@
170 170
171#define VIOTAPE_MAJOR 230 171#define VIOTAPE_MAJOR 230
172 172
173#define BLOCK_EXT_MAJOR 259
174
173#endif 175#endif
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index 310e61606415..8b4aa0523db7 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -41,6 +41,8 @@ struct mtd_blktrans_ops {
41 unsigned long block, char *buffer); 41 unsigned long block, char *buffer);
42 int (*writesect)(struct mtd_blktrans_dev *dev, 42 int (*writesect)(struct mtd_blktrans_dev *dev,
43 unsigned long block, char *buffer); 43 unsigned long block, char *buffer);
44 int (*discard)(struct mtd_blktrans_dev *dev,
45 unsigned long block, unsigned nr_blocks);
44 46
45 /* Block layer ioctls */ 47 /* Block layer ioctls */
46 int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo); 48 int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo);
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index f9f6e793575c..855bf95963e7 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -75,7 +75,6 @@ struct scsi_cmnd {
75 75
76 int retries; 76 int retries;
77 int allowed; 77 int allowed;
78 int timeout_per_command;
79 78
80 unsigned char prot_op; 79 unsigned char prot_op;
81 unsigned char prot_type; 80 unsigned char prot_type;
@@ -86,7 +85,6 @@ struct scsi_cmnd {
86 /* These elements define the operation we are about to perform */ 85 /* These elements define the operation we are about to perform */
87 unsigned char *cmnd; 86 unsigned char *cmnd;
88 87
89 struct timer_list eh_timeout; /* Used to time out the command. */
90 88
91 /* These elements define the operation we ultimately want to perform */ 89 /* These elements define the operation we ultimately want to perform */
92 struct scsi_data_buffer sdb; 90 struct scsi_data_buffer sdb;
@@ -139,7 +137,6 @@ extern void scsi_put_command(struct scsi_cmnd *);
139extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *, 137extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *,
140 struct device *); 138 struct device *);
141extern void scsi_finish_command(struct scsi_cmnd *cmd); 139extern void scsi_finish_command(struct scsi_cmnd *cmd);
142extern void scsi_req_abort_cmd(struct scsi_cmnd *cmd);
143 140
144extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count, 141extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count,
145 size_t *offset, size_t *len); 142 size_t *offset, size_t *len);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 44a55d1bf530..d123ca84e732 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -43,13 +43,6 @@ struct blk_queue_tags;
43#define DISABLE_CLUSTERING 0 43#define DISABLE_CLUSTERING 0
44#define ENABLE_CLUSTERING 1 44#define ENABLE_CLUSTERING 1
45 45
46enum scsi_eh_timer_return {
47 EH_NOT_HANDLED,
48 EH_HANDLED,
49 EH_RESET_TIMER,
50};
51
52
53struct scsi_host_template { 46struct scsi_host_template {
54 struct module *module; 47 struct module *module;
55 const char *name; 48 const char *name;
@@ -347,7 +340,7 @@ struct scsi_host_template {
347 * 340 *
348 * Status: OPTIONAL 341 * Status: OPTIONAL
349 */ 342 */
350 enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); 343 enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *);
351 344
352 /* 345 /*
353 * Name of proc directory 346 * Name of proc directory
diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h
index 490bd13a634c..0de32cd4e8a7 100644
--- a/include/scsi/scsi_transport.h
+++ b/include/scsi/scsi_transport.h
@@ -21,6 +21,7 @@
21#define SCSI_TRANSPORT_H 21#define SCSI_TRANSPORT_H
22 22
23#include <linux/transport_class.h> 23#include <linux/transport_class.h>
24#include <linux/blkdev.h>
24#include <scsi/scsi_host.h> 25#include <scsi/scsi_host.h>
25#include <scsi/scsi_device.h> 26#include <scsi/scsi_device.h>
26 27
@@ -64,7 +65,7 @@ struct scsi_transport_template {
64 * begin counting again 65 * begin counting again
65 * EH_NOT_HANDLED Begin normal error recovery 66 * EH_NOT_HANDLED Begin normal error recovery
66 */ 67 */
67 enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); 68 enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *);
68 69
69 /* 70 /*
70 * Used as callback for the completion of i_t_nexus request 71 * Used as callback for the completion of i_t_nexus request
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 3715feb8446d..d055b1914c3d 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -263,6 +263,10 @@ retry:
263 printk("Please append a correct \"root=\" boot option; here are the available partitions:\n"); 263 printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
264 264
265 printk_all_partitions(); 265 printk_all_partitions();
266#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
267 printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify "
268 "explicit textual name for \"root=\" boot option.\n");
269#endif
266 panic("VFS: Unable to mount root fs on %s", b); 270 panic("VFS: Unable to mount root fs on %s", b);
267 } 271 }
268 272
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0b504814e378..7d7a31d0ddeb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -624,6 +624,28 @@ config BACKTRACE_SELF_TEST
624 624
625 Say N if you are unsure. 625 Say N if you are unsure.
626 626
627config DEBUG_BLOCK_EXT_DEVT
628 bool "Force extended block device numbers and spread them"
629 depends on DEBUG_KERNEL
630 depends on BLOCK
631 default n
632 help
633 Conventionally, block device numbers are allocated from
634 predetermined contiguous area. However, extended block area
635 may introduce non-contiguous block device numbers. This
636 option forces most block device numbers to be allocated from
637 the extended space and spreads them to discover kernel or
638 userland code paths which assume predetermined contiguous
639 device number allocation.
640
641 Note that turning on this debug option shuffles all the
642 device numbers for all IDE and SCSI devices including libata
643 ones, so root partition specified using device number
644 directly (via rdev or root=MAJ:MIN) won't work anymore.
645 Textual device names (root=/dev/sdXn) will continue to work.
646
647 Say N if you are unsure.
648
627config LKDTM 649config LKDTM
628 tristate "Linux Kernel Dump Test Tool Module" 650 tristate "Linux Kernel Dump Test Tool Module"
629 depends on DEBUG_KERNEL 651 depends on DEBUG_KERNEL
@@ -661,10 +683,21 @@ config FAIL_PAGE_ALLOC
661 683
662config FAIL_MAKE_REQUEST 684config FAIL_MAKE_REQUEST
663 bool "Fault-injection capability for disk IO" 685 bool "Fault-injection capability for disk IO"
664 depends on FAULT_INJECTION 686 depends on FAULT_INJECTION && BLOCK
665 help 687 help
666 Provide fault-injection capability for disk IO. 688 Provide fault-injection capability for disk IO.
667 689
690config FAIL_IO_TIMEOUT
691 bool "Faul-injection capability for faking disk interrupts"
692 depends on FAULT_INJECTION && BLOCK
693 help
694 Provide fault-injection capability on end IO handling. This
695 will make the block layer "forget" an interrupt as configured,
696 thus exercising the error handling.
697
698 Only works with drivers that use the generic timeout handling,
699 for others it wont do anything.
700
668config FAULT_INJECTION_DEBUG_FS 701config FAULT_INJECTION_DEBUG_FS
669 bool "Debugfs entries for fault-injection capabilities" 702 bool "Debugfs entries for fault-injection capabilities"
670 depends on FAULT_INJECTION && SYSFS && DEBUG_FS 703 depends on FAULT_INJECTION && SYSFS && DEBUG_FS
diff --git a/lib/klist.c b/lib/klist.c
index cca37f96faa2..bbdd3015c2c7 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -37,6 +37,37 @@
37#include <linux/klist.h> 37#include <linux/klist.h>
38#include <linux/module.h> 38#include <linux/module.h>
39 39
40/*
41 * Use the lowest bit of n_klist to mark deleted nodes and exclude
42 * dead ones from iteration.
43 */
44#define KNODE_DEAD 1LU
45#define KNODE_KLIST_MASK ~KNODE_DEAD
46
47static struct klist *knode_klist(struct klist_node *knode)
48{
49 return (struct klist *)
50 ((unsigned long)knode->n_klist & KNODE_KLIST_MASK);
51}
52
53static bool knode_dead(struct klist_node *knode)
54{
55 return (unsigned long)knode->n_klist & KNODE_DEAD;
56}
57
58static void knode_set_klist(struct klist_node *knode, struct klist *klist)
59{
60 knode->n_klist = klist;
61 /* no knode deserves to start its life dead */
62 WARN_ON(knode_dead(knode));
63}
64
65static void knode_kill(struct klist_node *knode)
66{
67 /* and no knode should die twice ever either, see we're very humane */
68 WARN_ON(knode_dead(knode));
69 *(unsigned long *)&knode->n_klist |= KNODE_DEAD;
70}
40 71
41/** 72/**
42 * klist_init - Initialize a klist structure. 73 * klist_init - Initialize a klist structure.
@@ -79,7 +110,7 @@ static void klist_node_init(struct klist *k, struct klist_node *n)
79 INIT_LIST_HEAD(&n->n_node); 110 INIT_LIST_HEAD(&n->n_node);
80 init_completion(&n->n_removed); 111 init_completion(&n->n_removed);
81 kref_init(&n->n_ref); 112 kref_init(&n->n_ref);
82 n->n_klist = k; 113 knode_set_klist(n, k);
83 if (k->get) 114 if (k->get)
84 k->get(n); 115 k->get(n);
85} 116}
@@ -115,7 +146,7 @@ EXPORT_SYMBOL_GPL(klist_add_tail);
115 */ 146 */
116void klist_add_after(struct klist_node *n, struct klist_node *pos) 147void klist_add_after(struct klist_node *n, struct klist_node *pos)
117{ 148{
118 struct klist *k = pos->n_klist; 149 struct klist *k = knode_klist(pos);
119 150
120 klist_node_init(k, n); 151 klist_node_init(k, n);
121 spin_lock(&k->k_lock); 152 spin_lock(&k->k_lock);
@@ -131,7 +162,7 @@ EXPORT_SYMBOL_GPL(klist_add_after);
131 */ 162 */
132void klist_add_before(struct klist_node *n, struct klist_node *pos) 163void klist_add_before(struct klist_node *n, struct klist_node *pos)
133{ 164{
134 struct klist *k = pos->n_klist; 165 struct klist *k = knode_klist(pos);
135 166
136 klist_node_init(k, n); 167 klist_node_init(k, n);
137 spin_lock(&k->k_lock); 168 spin_lock(&k->k_lock);
@@ -144,9 +175,10 @@ static void klist_release(struct kref *kref)
144{ 175{
145 struct klist_node *n = container_of(kref, struct klist_node, n_ref); 176 struct klist_node *n = container_of(kref, struct klist_node, n_ref);
146 177
178 WARN_ON(!knode_dead(n));
147 list_del(&n->n_node); 179 list_del(&n->n_node);
148 complete(&n->n_removed); 180 complete(&n->n_removed);
149 n->n_klist = NULL; 181 knode_set_klist(n, NULL);
150} 182}
151 183
152static int klist_dec_and_del(struct klist_node *n) 184static int klist_dec_and_del(struct klist_node *n)
@@ -154,22 +186,29 @@ static int klist_dec_and_del(struct klist_node *n)
154 return kref_put(&n->n_ref, klist_release); 186 return kref_put(&n->n_ref, klist_release);
155} 187}
156 188
157/** 189static void klist_put(struct klist_node *n, bool kill)
158 * klist_del - Decrement the reference count of node and try to remove.
159 * @n: node we're deleting.
160 */
161void klist_del(struct klist_node *n)
162{ 190{
163 struct klist *k = n->n_klist; 191 struct klist *k = knode_klist(n);
164 void (*put)(struct klist_node *) = k->put; 192 void (*put)(struct klist_node *) = k->put;
165 193
166 spin_lock(&k->k_lock); 194 spin_lock(&k->k_lock);
195 if (kill)
196 knode_kill(n);
167 if (!klist_dec_and_del(n)) 197 if (!klist_dec_and_del(n))
168 put = NULL; 198 put = NULL;
169 spin_unlock(&k->k_lock); 199 spin_unlock(&k->k_lock);
170 if (put) 200 if (put)
171 put(n); 201 put(n);
172} 202}
203
204/**
205 * klist_del - Decrement the reference count of node and try to remove.
206 * @n: node we're deleting.
207 */
208void klist_del(struct klist_node *n)
209{
210 klist_put(n, true);
211}
173EXPORT_SYMBOL_GPL(klist_del); 212EXPORT_SYMBOL_GPL(klist_del);
174 213
175/** 214/**
@@ -206,7 +245,6 @@ void klist_iter_init_node(struct klist *k, struct klist_iter *i,
206 struct klist_node *n) 245 struct klist_node *n)
207{ 246{
208 i->i_klist = k; 247 i->i_klist = k;
209 i->i_head = &k->k_list;
210 i->i_cur = n; 248 i->i_cur = n;
211 if (n) 249 if (n)
212 kref_get(&n->n_ref); 250 kref_get(&n->n_ref);
@@ -237,7 +275,7 @@ EXPORT_SYMBOL_GPL(klist_iter_init);
237void klist_iter_exit(struct klist_iter *i) 275void klist_iter_exit(struct klist_iter *i)
238{ 276{
239 if (i->i_cur) { 277 if (i->i_cur) {
240 klist_del(i->i_cur); 278 klist_put(i->i_cur, false);
241 i->i_cur = NULL; 279 i->i_cur = NULL;
242 } 280 }
243} 281}
@@ -258,27 +296,33 @@ static struct klist_node *to_klist_node(struct list_head *n)
258 */ 296 */
259struct klist_node *klist_next(struct klist_iter *i) 297struct klist_node *klist_next(struct klist_iter *i)
260{ 298{
261 struct list_head *next;
262 struct klist_node *lnode = i->i_cur;
263 struct klist_node *knode = NULL;
264 void (*put)(struct klist_node *) = i->i_klist->put; 299 void (*put)(struct klist_node *) = i->i_klist->put;
300 struct klist_node *last = i->i_cur;
301 struct klist_node *next;
265 302
266 spin_lock(&i->i_klist->k_lock); 303 spin_lock(&i->i_klist->k_lock);
267 if (lnode) { 304
268 next = lnode->n_node.next; 305 if (last) {
269 if (!klist_dec_and_del(lnode)) 306 next = to_klist_node(last->n_node.next);
307 if (!klist_dec_and_del(last))
270 put = NULL; 308 put = NULL;
271 } else 309 } else
272 next = i->i_head->next; 310 next = to_klist_node(i->i_klist->k_list.next);
273 311
274 if (next != i->i_head) { 312 i->i_cur = NULL;
275 knode = to_klist_node(next); 313 while (next != to_klist_node(&i->i_klist->k_list)) {
276 kref_get(&knode->n_ref); 314 if (likely(!knode_dead(next))) {
315 kref_get(&next->n_ref);
316 i->i_cur = next;
317 break;
318 }
319 next = to_klist_node(next->n_node.next);
277 } 320 }
278 i->i_cur = knode; 321
279 spin_unlock(&i->i_klist->k_lock); 322 spin_unlock(&i->i_klist->k_lock);
280 if (put && lnode) 323
281 put(lnode); 324 if (put && last)
282 return knode; 325 put(last);
326 return i->i_cur;
283} 327}
284EXPORT_SYMBOL_GPL(klist_next); 328EXPORT_SYMBOL_GPL(klist_next);
diff --git a/mm/bounce.c b/mm/bounce.c
index b6d2d0f1019b..06722c403058 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -267,7 +267,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig)
267 /* 267 /*
268 * Data-less bio, nothing to bounce 268 * Data-less bio, nothing to bounce
269 */ 269 */
270 if (bio_empty_barrier(*bio_orig)) 270 if (!bio_has_data(*bio_orig))
271 return; 271 return;
272 272
273 /* 273 /*