aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-block64
-rw-r--r--block/blk-exec.c2
-rw-r--r--block/blk-flush.c16
-rw-r--r--block/blk-lib.c82
-rw-r--r--block/blk-settings.c9
-rw-r--r--block/blk-sysfs.c3
-rw-r--r--block/blk.h21
-rw-r--r--block/elevator.c7
-rw-r--r--drivers/ata/libata-scsi.c13
-rw-r--r--drivers/block/paride/pcd.c2
-rw-r--r--drivers/cdrom/viocd.c4
-rw-r--r--drivers/ide/ide-cd.c3
-rw-r--r--drivers/scsi/sr.c2
-rw-r--r--fs/block_dev.c17
-rw-r--r--fs/partitions/check.c8
-rw-r--r--include/linux/blkdev.h15
-rw-r--r--include/linux/genhd.h2
17 files changed, 177 insertions, 93 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 4873c759d535..c1eb41cb9876 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -142,3 +142,67 @@ Description:
142 with the previous I/O request are enabled. When set to 2, 142 with the previous I/O request are enabled. When set to 2,
143 all merge tries are disabled. The default value is 0 - 143 all merge tries are disabled. The default value is 0 -
144 which enables all types of merge tries. 144 which enables all types of merge tries.
145
146What: /sys/block/<disk>/discard_alignment
147Date: May 2011
148Contact: Martin K. Petersen <martin.petersen@oracle.com>
149Description:
150 Devices that support discard functionality may
151 internally allocate space in units that are bigger than
152 the exported logical block size. The discard_alignment
153 parameter indicates how many bytes the beginning of the
154 device is offset from the internal allocation unit's
155 natural alignment.
156
157What: /sys/block/<disk>/<partition>/discard_alignment
158Date: May 2011
159Contact: Martin K. Petersen <martin.petersen@oracle.com>
160Description:
161 Devices that support discard functionality may
162 internally allocate space in units that are bigger than
163 the exported logical block size. The discard_alignment
164 parameter indicates how many bytes the beginning of the
165 partition is offset from the internal allocation unit's
166 natural alignment.
167
168What: /sys/block/<disk>/queue/discard_granularity
169Date: May 2011
170Contact: Martin K. Petersen <martin.petersen@oracle.com>
171Description:
172 Devices that support discard functionality may
173 internally allocate space using units that are bigger
174 than the logical block size. The discard_granularity
175 parameter indicates the size of the internal allocation
176 unit in bytes if reported by the device. Otherwise the
177 discard_granularity will be set to match the device's
178 physical block size. A discard_granularity of 0 means
179 that the device does not support discard functionality.
180
181What: /sys/block/<disk>/queue/discard_max_bytes
182Date: May 2011
183Contact: Martin K. Petersen <martin.petersen@oracle.com>
184Description:
185 Devices that support discard functionality may have
186 internal limits on the number of bytes that can be
187 trimmed or unmapped in a single operation. Some storage
188 protocols also have inherent limits on the number of
189 blocks that can be described in a single command. The
190 discard_max_bytes parameter is set by the device driver
191 to the maximum number of bytes that can be discarded in
192 a single operation. Discard requests issued to the
193 device must not exceed this limit. A discard_max_bytes
194 value of 0 means that the device does not support
195 discard functionality.
196
197What: /sys/block/<disk>/queue/discard_zeroes_data
198Date: May 2011
199Contact: Martin K. Petersen <martin.petersen@oracle.com>
200Description:
201 Devices that support discard functionality may return
202 stale or random data when a previously discarded block
203 is read back. This can cause problems if the filesystem
204 expects discarded blocks to be explicitly cleared. If a
205 device reports that it deterministically returns zeroes
206 when a discarded area is read the discard_zeroes_data
207 parameter will be set to one. Otherwise it will be 0 and
208 the result of reading a discarded area is undefined.
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 81e31819a597..8a0e7ec056e7 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -56,7 +56,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
56 spin_lock_irq(q->queue_lock); 56 spin_lock_irq(q->queue_lock);
57 __elv_add_request(q, rq, where); 57 __elv_add_request(q, rq, where);
58 __blk_run_queue(q); 58 __blk_run_queue(q);
59 /* the queue is stopped so it won't be plugged+unplugged */ 59 /* the queue is stopped so it won't be run */
60 if (rq->cmd_type == REQ_TYPE_PM_RESUME) 60 if (rq->cmd_type == REQ_TYPE_PM_RESUME)
61 q->request_fn(q); 61 q->request_fn(q);
62 spin_unlock_irq(q->queue_lock); 62 spin_unlock_irq(q->queue_lock);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 6c9b5e189e62..bb21e4c36f70 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -212,13 +212,19 @@ static void flush_end_io(struct request *flush_rq, int error)
212 } 212 }
213 213
214 /* 214 /*
215 * Moving a request silently to empty queue_head may stall the 215 * Kick the queue to avoid stall for two cases:
216 * queue. Kick the queue in those cases. This function is called 216 * 1. Moving a request silently to empty queue_head may stall the
217 * from request completion path and calling directly into 217 * queue.
218 * request_fn may confuse the driver. Always use kblockd. 218 * 2. When flush request is running in non-queueable queue, the
219 * queue is hold. Restart the queue after flush request is finished
220 * to avoid stall.
221 * This function is called from request completion path and calling
222 * directly into request_fn may confuse the driver. Always use
223 * kblockd.
219 */ 224 */
220 if (queued) 225 if (queued || q->flush_queue_delayed)
221 blk_run_queue_async(q); 226 blk_run_queue_async(q);
227 q->flush_queue_delayed = 0;
222} 228}
223 229
224/** 230/**
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 25de73e4759b..78e627e2581d 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -9,17 +9,20 @@
9 9
10#include "blk.h" 10#include "blk.h"
11 11
12static void blkdev_discard_end_io(struct bio *bio, int err) 12struct bio_batch {
13{ 13 atomic_t done;
14 if (err) { 14 unsigned long flags;
15 if (err == -EOPNOTSUPP) 15 struct completion *wait;
16 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 16};
17 clear_bit(BIO_UPTODATE, &bio->bi_flags);
18 }
19 17
20 if (bio->bi_private) 18static void bio_batch_end_io(struct bio *bio, int err)
21 complete(bio->bi_private); 19{
20 struct bio_batch *bb = bio->bi_private;
22 21
22 if (err && (err != -EOPNOTSUPP))
23 clear_bit(BIO_UPTODATE, &bb->flags);
24 if (atomic_dec_and_test(&bb->done))
25 complete(bb->wait);
23 bio_put(bio); 26 bio_put(bio);
24} 27}
25 28
@@ -41,6 +44,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
41 struct request_queue *q = bdev_get_queue(bdev); 44 struct request_queue *q = bdev_get_queue(bdev);
42 int type = REQ_WRITE | REQ_DISCARD; 45 int type = REQ_WRITE | REQ_DISCARD;
43 unsigned int max_discard_sectors; 46 unsigned int max_discard_sectors;
47 struct bio_batch bb;
44 struct bio *bio; 48 struct bio *bio;
45 int ret = 0; 49 int ret = 0;
46 50
@@ -67,7 +71,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
67 type |= REQ_SECURE; 71 type |= REQ_SECURE;
68 } 72 }
69 73
70 while (nr_sects && !ret) { 74 atomic_set(&bb.done, 1);
75 bb.flags = 1 << BIO_UPTODATE;
76 bb.wait = &wait;
77
78 while (nr_sects) {
71 bio = bio_alloc(gfp_mask, 1); 79 bio = bio_alloc(gfp_mask, 1);
72 if (!bio) { 80 if (!bio) {
73 ret = -ENOMEM; 81 ret = -ENOMEM;
@@ -75,9 +83,9 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
75 } 83 }
76 84
77 bio->bi_sector = sector; 85 bio->bi_sector = sector;
78 bio->bi_end_io = blkdev_discard_end_io; 86 bio->bi_end_io = bio_batch_end_io;
79 bio->bi_bdev = bdev; 87 bio->bi_bdev = bdev;
80 bio->bi_private = &wait; 88 bio->bi_private = &bb;
81 89
82 if (nr_sects > max_discard_sectors) { 90 if (nr_sects > max_discard_sectors) {
83 bio->bi_size = max_discard_sectors << 9; 91 bio->bi_size = max_discard_sectors << 9;
@@ -88,45 +96,21 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
88 nr_sects = 0; 96 nr_sects = 0;
89 } 97 }
90 98
91 bio_get(bio); 99 atomic_inc(&bb.done);
92 submit_bio(type, bio); 100 submit_bio(type, bio);
101 }
93 102
103 /* Wait for bios in-flight */
104 if (!atomic_dec_and_test(&bb.done))
94 wait_for_completion(&wait); 105 wait_for_completion(&wait);
95 106
96 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 107 if (!test_bit(BIO_UPTODATE, &bb.flags))
97 ret = -EOPNOTSUPP; 108 ret = -EIO;
98 else if (!bio_flagged(bio, BIO_UPTODATE))
99 ret = -EIO;
100 bio_put(bio);
101 }
102 109
103 return ret; 110 return ret;
104} 111}
105EXPORT_SYMBOL(blkdev_issue_discard); 112EXPORT_SYMBOL(blkdev_issue_discard);
106 113
107struct bio_batch
108{
109 atomic_t done;
110 unsigned long flags;
111 struct completion *wait;
112};
113
114static void bio_batch_end_io(struct bio *bio, int err)
115{
116 struct bio_batch *bb = bio->bi_private;
117
118 if (err) {
119 if (err == -EOPNOTSUPP)
120 set_bit(BIO_EOPNOTSUPP, &bb->flags);
121 else
122 clear_bit(BIO_UPTODATE, &bb->flags);
123 }
124 if (bb)
125 if (atomic_dec_and_test(&bb->done))
126 complete(bb->wait);
127 bio_put(bio);
128}
129
130/** 114/**
131 * blkdev_issue_zeroout - generate number of zero filed write bios 115 * blkdev_issue_zeroout - generate number of zero filed write bios
132 * @bdev: blockdev to issue 116 * @bdev: blockdev to issue
@@ -151,7 +135,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
151 bb.flags = 1 << BIO_UPTODATE; 135 bb.flags = 1 << BIO_UPTODATE;
152 bb.wait = &wait; 136 bb.wait = &wait;
153 137
154submit:
155 ret = 0; 138 ret = 0;
156 while (nr_sects != 0) { 139 while (nr_sects != 0) {
157 bio = bio_alloc(gfp_mask, 140 bio = bio_alloc(gfp_mask,
@@ -168,9 +151,6 @@ submit:
168 151
169 while (nr_sects != 0) { 152 while (nr_sects != 0) {
170 sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); 153 sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
171 if (sz == 0)
172 /* bio has maximum size possible */
173 break;
174 ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0); 154 ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
175 nr_sects -= ret >> 9; 155 nr_sects -= ret >> 9;
176 sector += ret >> 9; 156 sector += ret >> 9;
@@ -190,16 +170,6 @@ submit:
190 /* One of bios in the batch was completed with error.*/ 170 /* One of bios in the batch was completed with error.*/
191 ret = -EIO; 171 ret = -EIO;
192 172
193 if (ret)
194 goto out;
195
196 if (test_bit(BIO_EOPNOTSUPP, &bb.flags)) {
197 ret = -EOPNOTSUPP;
198 goto out;
199 }
200 if (nr_sects != 0)
201 goto submit;
202out:
203 return ret; 173 return ret;
204} 174}
205EXPORT_SYMBOL(blkdev_issue_zeroout); 175EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1fa769293597..fa1eb0449a05 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -120,7 +120,7 @@ void blk_set_default_limits(struct queue_limits *lim)
120 lim->discard_granularity = 0; 120 lim->discard_granularity = 0;
121 lim->discard_alignment = 0; 121 lim->discard_alignment = 0;
122 lim->discard_misaligned = 0; 122 lim->discard_misaligned = 0;
123 lim->discard_zeroes_data = -1; 123 lim->discard_zeroes_data = 1;
124 lim->logical_block_size = lim->physical_block_size = lim->io_min = 512; 124 lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
125 lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT); 125 lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
126 lim->alignment_offset = 0; 126 lim->alignment_offset = 0;
@@ -166,6 +166,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
166 166
167 blk_set_default_limits(&q->limits); 167 blk_set_default_limits(&q->limits);
168 blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); 168 blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
169 q->limits.discard_zeroes_data = 0;
169 170
170 /* 171 /*
171 * by default assume old behaviour and bounce for any highmem page 172 * by default assume old behaviour and bounce for any highmem page
@@ -790,6 +791,12 @@ void blk_queue_flush(struct request_queue *q, unsigned int flush)
790} 791}
791EXPORT_SYMBOL_GPL(blk_queue_flush); 792EXPORT_SYMBOL_GPL(blk_queue_flush);
792 793
794void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
795{
796 q->flush_not_queueable = !queueable;
797}
798EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
799
793static int __init blk_settings_init(void) 800static int __init blk_settings_init(void)
794{ 801{
795 blk_max_low_pfn = max_low_pfn - 1; 802 blk_max_low_pfn = max_low_pfn - 1;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index bd236313f35d..d935bd859c87 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -152,7 +152,8 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
152 152
153static ssize_t queue_discard_max_show(struct request_queue *q, char *page) 153static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
154{ 154{
155 return queue_var_show(q->limits.max_discard_sectors << 9, page); 155 return sprintf(page, "%llu\n",
156 (unsigned long long)q->limits.max_discard_sectors << 9);
156} 157}
157 158
158static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page) 159static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
diff --git a/block/blk.h b/block/blk.h
index 61263463e38e..1f798b5a6f19 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -61,7 +61,26 @@ static inline struct request *__elv_next_request(struct request_queue *q)
61 rq = list_entry_rq(q->queue_head.next); 61 rq = list_entry_rq(q->queue_head.next);
62 return rq; 62 return rq;
63 } 63 }
64 64 /*
65 * Flush request is running and flush request isn't queueable
66 * in the drive, we can hold the queue till flush request is
67 * finished. Even we don't do this, driver can't dispatch next
68 * requests and will requeue them. And this can improve
69 * throughput too. For example, we have request flush1, write1,
70 * flush 2. flush1 is dispatched, then queue is hold, write1
71 * isn't inserted to queue. After flush1 is finished, flush2
72 * will be dispatched. Since disk cache is already clean,
73 * flush2 will be finished very soon, so looks like flush2 is
74 * folded to flush1.
75 * Since the queue is hold, a flag is set to indicate the queue
76 * should be restarted later. Please see flush_end_io() for
77 * details.
78 */
79 if (q->flush_pending_idx != q->flush_running_idx &&
80 !queue_flush_queueable(q)) {
81 q->flush_queue_delayed = 1;
82 return NULL;
83 }
65 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 84 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
66 return NULL; 85 return NULL;
67 } 86 }
diff --git a/block/elevator.c b/block/elevator.c
index 45ca1e34f582..2a0b653c90fd 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -155,13 +155,8 @@ static struct elevator_type *elevator_get(const char *name)
155 155
156 e = elevator_find(name); 156 e = elevator_find(name);
157 if (!e) { 157 if (!e) {
158 char elv[ELV_NAME_MAX + strlen("-iosched")];
159
160 spin_unlock(&elv_list_lock); 158 spin_unlock(&elv_list_lock);
161 159 request_module("%s-iosched", name);
162 snprintf(elv, sizeof(elv), "%s-iosched", name);
163
164 request_module("%s", elv);
165 spin_lock(&elv_list_lock); 160 spin_lock(&elv_list_lock);
166 e = elevator_find(name); 161 e = elevator_find(name);
167 } 162 }
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index e2f57e9e12f0..dad2c952e0d2 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1089,21 +1089,21 @@ static int atapi_drain_needed(struct request *rq)
1089static int ata_scsi_dev_config(struct scsi_device *sdev, 1089static int ata_scsi_dev_config(struct scsi_device *sdev,
1090 struct ata_device *dev) 1090 struct ata_device *dev)
1091{ 1091{
1092 struct request_queue *q = sdev->request_queue;
1093
1092 if (!ata_id_has_unload(dev->id)) 1094 if (!ata_id_has_unload(dev->id))
1093 dev->flags |= ATA_DFLAG_NO_UNLOAD; 1095 dev->flags |= ATA_DFLAG_NO_UNLOAD;
1094 1096
1095 /* configure max sectors */ 1097 /* configure max sectors */
1096 blk_queue_max_hw_sectors(sdev->request_queue, dev->max_sectors); 1098 blk_queue_max_hw_sectors(q, dev->max_sectors);
1097 1099
1098 if (dev->class == ATA_DEV_ATAPI) { 1100 if (dev->class == ATA_DEV_ATAPI) {
1099 struct request_queue *q = sdev->request_queue;
1100 void *buf; 1101 void *buf;
1101 1102
1102 sdev->sector_size = ATA_SECT_SIZE; 1103 sdev->sector_size = ATA_SECT_SIZE;
1103 1104
1104 /* set DMA padding */ 1105 /* set DMA padding */
1105 blk_queue_update_dma_pad(sdev->request_queue, 1106 blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
1106 ATA_DMA_PAD_SZ - 1);
1107 1107
1108 /* configure draining */ 1108 /* configure draining */
1109 buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL); 1109 buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
@@ -1131,8 +1131,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
1131 "sector_size=%u > PAGE_SIZE, PIO may malfunction\n", 1131 "sector_size=%u > PAGE_SIZE, PIO may malfunction\n",
1132 sdev->sector_size); 1132 sdev->sector_size);
1133 1133
1134 blk_queue_update_dma_alignment(sdev->request_queue, 1134 blk_queue_update_dma_alignment(q, sdev->sector_size - 1);
1135 sdev->sector_size - 1);
1136 1135
1137 if (dev->flags & ATA_DFLAG_AN) 1136 if (dev->flags & ATA_DFLAG_AN)
1138 set_bit(SDEV_EVT_MEDIA_CHANGE, sdev->supported_events); 1137 set_bit(SDEV_EVT_MEDIA_CHANGE, sdev->supported_events);
@@ -1145,6 +1144,8 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
1145 scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth); 1144 scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, depth);
1146 } 1145 }
1147 1146
1147 blk_queue_flush_queueable(q, false);
1148
1148 dev->sdev = sdev; 1149 dev->sdev = sdev;
1149 return 0; 1150 return 0;
1150} 1151}
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 8690e31d9932..a0aabd904a51 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -320,6 +320,8 @@ static void pcd_init_units(void)
320 disk->first_minor = unit; 320 disk->first_minor = unit;
321 strcpy(disk->disk_name, cd->name); /* umm... */ 321 strcpy(disk->disk_name, cd->name); /* umm... */
322 disk->fops = &pcd_bdops; 322 disk->fops = &pcd_bdops;
323 disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
324 disk->events = DISK_EVENT_MEDIA_CHANGE;
323 } 325 }
324} 326}
325 327
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index e427fbe45999..ae15a4ddaa9b 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -625,7 +625,9 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
625 blk_queue_max_hw_sectors(q, 4096 / 512); 625 blk_queue_max_hw_sectors(q, 4096 / 512);
626 gendisk->queue = q; 626 gendisk->queue = q;
627 gendisk->fops = &viocd_fops; 627 gendisk->fops = &viocd_fops;
628 gendisk->flags = GENHD_FL_CD|GENHD_FL_REMOVABLE; 628 gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE |
629 GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
630 gendisk->events = DISK_EVENT_MEDIA_CHANGE;
629 set_capacity(gendisk, 0); 631 set_capacity(gendisk, 0);
630 gendisk->private_data = d; 632 gendisk->private_data = d;
631 d->viocd_disk = gendisk; 633 d->viocd_disk = gendisk;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index a5ec5a7cb381..6e5123b1d341 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1781,7 +1781,8 @@ static int ide_cd_probe(ide_drive_t *drive)
1781 1781
1782 ide_cd_read_toc(drive, &sense); 1782 ide_cd_read_toc(drive, &sense);
1783 g->fops = &idecd_ops; 1783 g->fops = &idecd_ops;
1784 g->flags |= GENHD_FL_REMOVABLE; 1784 g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
1785 g->events = DISK_EVENT_MEDIA_CHANGE;
1785 add_disk(g); 1786 add_disk(g);
1786 return 0; 1787 return 0;
1787 1788
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 95019c747cc1..4778e2707168 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -636,7 +636,7 @@ static int sr_probe(struct device *dev)
636 disk->first_minor = minor; 636 disk->first_minor = minor;
637 sprintf(disk->disk_name, "sr%d", minor); 637 sprintf(disk->disk_name, "sr%d", minor);
638 disk->fops = &sr_bdops; 638 disk->fops = &sr_bdops;
639 disk->flags = GENHD_FL_CD; 639 disk->flags = GENHD_FL_CD | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
640 disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST; 640 disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
641 641
642 blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT); 642 blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 257b00e98428..d7c2e0fddc6f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1237,6 +1237,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1237 res = __blkdev_get(bdev, mode, 0); 1237 res = __blkdev_get(bdev, mode, 0);
1238 1238
1239 if (whole) { 1239 if (whole) {
1240 struct gendisk *disk = whole->bd_disk;
1241
1240 /* finish claiming */ 1242 /* finish claiming */
1241 mutex_lock(&bdev->bd_mutex); 1243 mutex_lock(&bdev->bd_mutex);
1242 spin_lock(&bdev_lock); 1244 spin_lock(&bdev_lock);
@@ -1263,15 +1265,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1263 spin_unlock(&bdev_lock); 1265 spin_unlock(&bdev_lock);
1264 1266
1265 /* 1267 /*
1266 * Block event polling for write claims. Any write 1268 * Block event polling for write claims if requested. Any
1267 * holder makes the write_holder state stick until all 1269 * write holder makes the write_holder state stick until
1268 * are released. This is good enough and tracking 1270 * all are released. This is good enough and tracking
1269 * individual writeable reference is too fragile given 1271 * individual writeable reference is too fragile given the
1270 * the way @mode is used in blkdev_get/put(). 1272 * way @mode is used in blkdev_get/put().
1271 */ 1273 */
1272 if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { 1274 if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) &&
1275 !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) {
1273 bdev->bd_write_holder = true; 1276 bdev->bd_write_holder = true;
1274 disk_block_events(bdev->bd_disk); 1277 disk_block_events(disk);
1275 } 1278 }
1276 1279
1277 mutex_unlock(&bdev->bd_mutex); 1280 mutex_unlock(&bdev->bd_mutex);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index d545e97d99c3..8ed4d3433199 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -255,7 +255,11 @@ ssize_t part_discard_alignment_show(struct device *dev,
255 struct device_attribute *attr, char *buf) 255 struct device_attribute *attr, char *buf)
256{ 256{
257 struct hd_struct *p = dev_to_part(dev); 257 struct hd_struct *p = dev_to_part(dev);
258 return sprintf(buf, "%u\n", p->discard_alignment); 258 struct gendisk *disk = dev_to_disk(dev);
259
260 return sprintf(buf, "%u\n",
261 queue_limit_discard_alignment(&disk->queue->limits,
262 p->start_sect));
259} 263}
260 264
261ssize_t part_stat_show(struct device *dev, 265ssize_t part_stat_show(struct device *dev,
@@ -449,8 +453,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
449 p->start_sect = start; 453 p->start_sect = start;
450 p->alignment_offset = 454 p->alignment_offset =
451 queue_limit_alignment_offset(&disk->queue->limits, start); 455 queue_limit_alignment_offset(&disk->queue->limits, start);
452 p->discard_alignment =
453 queue_limit_discard_alignment(&disk->queue->limits, start);
454 p->nr_sects = len; 456 p->nr_sects = len;
455 p->partno = partno; 457 p->partno = partno;
456 p->policy = get_disk_ro(disk); 458 p->policy = get_disk_ro(disk);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2ad95fa1d130..ae9091a68480 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -257,7 +257,7 @@ struct queue_limits {
257 unsigned char misaligned; 257 unsigned char misaligned;
258 unsigned char discard_misaligned; 258 unsigned char discard_misaligned;
259 unsigned char cluster; 259 unsigned char cluster;
260 signed char discard_zeroes_data; 260 unsigned char discard_zeroes_data;
261}; 261};
262 262
263struct request_queue 263struct request_queue
@@ -364,6 +364,8 @@ struct request_queue
364 * for flush operations 364 * for flush operations
365 */ 365 */
366 unsigned int flush_flags; 366 unsigned int flush_flags;
367 unsigned int flush_not_queueable:1;
368 unsigned int flush_queue_delayed:1;
367 unsigned int flush_pending_idx:1; 369 unsigned int flush_pending_idx:1;
368 unsigned int flush_running_idx:1; 370 unsigned int flush_running_idx:1;
369 unsigned long flush_pending_since; 371 unsigned long flush_pending_since;
@@ -843,6 +845,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
843extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); 845extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
844extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 846extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
845extern void blk_queue_flush(struct request_queue *q, unsigned int flush); 847extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
848extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
846extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 849extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
847 850
848extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); 851extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
@@ -1066,13 +1069,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
1066{ 1069{
1067 unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); 1070 unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
1068 1071
1072 if (!lim->max_discard_sectors)
1073 return 0;
1074
1069 return (lim->discard_granularity + lim->discard_alignment - alignment) 1075 return (lim->discard_granularity + lim->discard_alignment - alignment)
1070 & (lim->discard_granularity - 1); 1076 & (lim->discard_granularity - 1);
1071} 1077}
1072 1078
1073static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) 1079static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
1074{ 1080{
1075 if (q->limits.discard_zeroes_data == 1) 1081 if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1)
1076 return 1; 1082 return 1;
1077 1083
1078 return 0; 1084 return 0;
@@ -1111,6 +1117,11 @@ static inline unsigned int block_size(struct block_device *bdev)
1111 return bdev->bd_block_size; 1117 return bdev->bd_block_size;
1112} 1118}
1113 1119
1120static inline bool queue_flush_queueable(struct request_queue *q)
1121{
1122 return !q->flush_not_queueable;
1123}
1124
1114typedef struct {struct page *v;} Sector; 1125typedef struct {struct page *v;} Sector;
1115 1126
1116unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); 1127unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index d764a426e9fd..b78956b3c2e7 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -100,7 +100,6 @@ struct hd_struct {
100 sector_t start_sect; 100 sector_t start_sect;
101 sector_t nr_sects; 101 sector_t nr_sects;
102 sector_t alignment_offset; 102 sector_t alignment_offset;
103 unsigned int discard_alignment;
104 struct device __dev; 103 struct device __dev;
105 struct kobject *holder_dir; 104 struct kobject *holder_dir;
106 int policy, partno; 105 int policy, partno;
@@ -127,6 +126,7 @@ struct hd_struct {
127#define GENHD_FL_SUPPRESS_PARTITION_INFO 32 126#define GENHD_FL_SUPPRESS_PARTITION_INFO 32
128#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ 127#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */
129#define GENHD_FL_NATIVE_CAPACITY 128 128#define GENHD_FL_NATIVE_CAPACITY 128
129#define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256
130 130
131enum { 131enum {
132 DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ 132 DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */