aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig12
-rw-r--r--block/Makefile4
-rw-r--r--block/as-iosched.c18
-rw-r--r--block/blk-core.c19
-rw-r--r--block/blk-integrity.c381
-rw-r--r--block/blk-map.c6
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/blk-settings.c24
-rw-r--r--block/blk.h8
-rw-r--r--block/blktrace.c45
-rw-r--r--block/bsg.c38
-rw-r--r--block/cfq-iosched.c83
-rw-r--r--block/cmd-filter.c334
-rw-r--r--block/elevator.c8
-rw-r--r--block/genhd.c12
-rw-r--r--block/scsi_ioctl.c121
16 files changed, 959 insertions, 157 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 3e97f2bc446f..1ab7c15c8d7a 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -81,6 +81,18 @@ config BLK_DEV_BSG
81 81
82 If unsure, say N. 82 If unsure, say N.
83 83
84config BLK_DEV_INTEGRITY
85 bool "Block layer data integrity support"
86 ---help---
87 Some storage devices allow extra information to be
88 stored/retrieved to help protect the data. The block layer
89 data integrity option provides hooks which can be used by
90 filesystems to ensure better data integrity.
91
92 Say yes here if you have a storage device that provides the
93 T10/SCSI Data Integrity Field or the T13/ATA External Path
94 Protection. If in doubt, say N.
95
84endif # BLOCK 96endif # BLOCK
85 97
86config BLOCK_COMPAT 98config BLOCK_COMPAT
diff --git a/block/Makefile b/block/Makefile
index 5a43c7d79594..208000b0750d 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,7 +4,8 @@
4 4
5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ 5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o 7 blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
8 cmd-filter.o
8 9
9obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
10obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
@@ -14,3 +15,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
14 15
15obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 16obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
16obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o 17obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
18obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 743f33a01a07..9735acb5b4f5 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -151,6 +151,7 @@ enum arq_state {
151 151
152static DEFINE_PER_CPU(unsigned long, ioc_count); 152static DEFINE_PER_CPU(unsigned long, ioc_count);
153static struct completion *ioc_gone; 153static struct completion *ioc_gone;
154static DEFINE_SPINLOCK(ioc_gone_lock);
154 155
155static void as_move_to_dispatch(struct as_data *ad, struct request *rq); 156static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
156static void as_antic_stop(struct as_data *ad); 157static void as_antic_stop(struct as_data *ad);
@@ -164,8 +165,19 @@ static void free_as_io_context(struct as_io_context *aic)
164{ 165{
165 kfree(aic); 166 kfree(aic);
166 elv_ioc_count_dec(ioc_count); 167 elv_ioc_count_dec(ioc_count);
167 if (ioc_gone && !elv_ioc_count_read(ioc_count)) 168 if (ioc_gone) {
168 complete(ioc_gone); 169 /*
170 * AS scheduler is exiting, grab exit lock and check
171 * the pending io context count. If it hits zero,
172 * complete ioc_gone and set it back to NULL.
173 */
174 spin_lock(&ioc_gone_lock);
175 if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
176 complete(ioc_gone);
177 ioc_gone = NULL;
178 }
179 spin_unlock(&ioc_gone_lock);
180 }
169} 181}
170 182
171static void as_trim(struct io_context *ioc) 183static void as_trim(struct io_context *ioc)
@@ -1493,7 +1505,7 @@ static void __exit as_exit(void)
1493 /* ioc_gone's update must be visible before reading ioc_count */ 1505 /* ioc_gone's update must be visible before reading ioc_count */
1494 smp_wmb(); 1506 smp_wmb();
1495 if (elv_ioc_count_read(ioc_count)) 1507 if (elv_ioc_count_read(ioc_count))
1496 wait_for_completion(ioc_gone); 1508 wait_for_completion(&all_gone);
1497 synchronize_rcu(); 1509 synchronize_rcu();
1498} 1510}
1499 1511
diff --git a/block/blk-core.c b/block/blk-core.c
index 1905aaba49fb..dbc7f42b5d2b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -143,6 +143,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
143 143
144 bio->bi_size -= nbytes; 144 bio->bi_size -= nbytes;
145 bio->bi_sector += (nbytes >> 9); 145 bio->bi_sector += (nbytes >> 9);
146
147 if (bio_integrity(bio))
148 bio_integrity_advance(bio, nbytes);
149
146 if (bio->bi_size == 0) 150 if (bio->bi_size == 0)
147 bio_endio(bio, error); 151 bio_endio(bio, error);
148 } else { 152 } else {
@@ -201,8 +205,7 @@ void blk_plug_device(struct request_queue *q)
201 if (blk_queue_stopped(q)) 205 if (blk_queue_stopped(q))
202 return; 206 return;
203 207
204 if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { 208 if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
205 __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
206 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); 209 mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
207 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); 210 blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
208 } 211 }
@@ -217,10 +220,9 @@ int blk_remove_plug(struct request_queue *q)
217{ 220{
218 WARN_ON(!irqs_disabled()); 221 WARN_ON(!irqs_disabled());
219 222
220 if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) 223 if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
221 return 0; 224 return 0;
222 225
223 queue_flag_clear(QUEUE_FLAG_PLUGGED, q);
224 del_timer(&q->unplug_timer); 226 del_timer(&q->unplug_timer);
225 return 1; 227 return 1;
226} 228}
@@ -324,8 +326,7 @@ void blk_start_queue(struct request_queue *q)
324 * one level of recursion is ok and is much faster than kicking 326 * one level of recursion is ok and is much faster than kicking
325 * the unplug handling 327 * the unplug handling
326 */ 328 */
327 if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 329 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
328 queue_flag_set(QUEUE_FLAG_REENTER, q);
329 q->request_fn(q); 330 q->request_fn(q);
330 queue_flag_clear(QUEUE_FLAG_REENTER, q); 331 queue_flag_clear(QUEUE_FLAG_REENTER, q);
331 } else { 332 } else {
@@ -390,8 +391,7 @@ void __blk_run_queue(struct request_queue *q)
390 * handling reinvoke the handler shortly if we already got there. 391 * handling reinvoke the handler shortly if we already got there.
391 */ 392 */
392 if (!elv_queue_empty(q)) { 393 if (!elv_queue_empty(q)) {
393 if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { 394 if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
394 queue_flag_set(QUEUE_FLAG_REENTER, q);
395 q->request_fn(q); 395 q->request_fn(q);
396 queue_flag_clear(QUEUE_FLAG_REENTER, q); 396 queue_flag_clear(QUEUE_FLAG_REENTER, q);
397 } else { 397 } else {
@@ -1381,6 +1381,9 @@ end_io:
1381 */ 1381 */
1382 blk_partition_remap(bio); 1382 blk_partition_remap(bio);
1383 1383
1384 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1385 goto end_io;
1386
1384 if (old_sector != -1) 1387 if (old_sector != -1)
1385 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 1388 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
1386 old_sector); 1389 old_sector);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
new file mode 100644
index 000000000000..3f1a8478cc38
--- /dev/null
+++ b/block/blk-integrity.c
@@ -0,0 +1,381 @@
1/*
2 * blk-integrity.c - Block layer data integrity extensions
3 *
4 * Copyright (C) 2007, 2008 Oracle Corporation
5 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
19 * USA.
20 *
21 */
22
23#include <linux/blkdev.h>
24#include <linux/mempool.h>
25#include <linux/bio.h>
26#include <linux/scatterlist.h>
27
28#include "blk.h"
29
30static struct kmem_cache *integrity_cachep;
31
32/**
33 * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
34 * @rq: request with integrity metadata attached
35 *
36 * Description: Returns the number of elements required in a
37 * scatterlist corresponding to the integrity metadata in a request.
38 */
39int blk_rq_count_integrity_sg(struct request *rq)
40{
41 struct bio_vec *iv, *ivprv;
42 struct req_iterator iter;
43 unsigned int segments;
44
45 ivprv = NULL;
46 segments = 0;
47
48 rq_for_each_integrity_segment(iv, rq, iter) {
49
50 if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv))
51 segments++;
52
53 ivprv = iv;
54 }
55
56 return segments;
57}
58EXPORT_SYMBOL(blk_rq_count_integrity_sg);
59
60/**
61 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
62 * @rq: request with integrity metadata attached
63 * @sglist: target scatterlist
64 *
65 * Description: Map the integrity vectors in request into a
66 * scatterlist. The scatterlist must be big enough to hold all
67 * elements. I.e. sized using blk_rq_count_integrity_sg().
68 */
69int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
70{
71 struct bio_vec *iv, *ivprv;
72 struct req_iterator iter;
73 struct scatterlist *sg;
74 unsigned int segments;
75
76 ivprv = NULL;
77 sg = NULL;
78 segments = 0;
79
80 rq_for_each_integrity_segment(iv, rq, iter) {
81
82 if (ivprv) {
83 if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
84 goto new_segment;
85
86 sg->length += iv->bv_len;
87 } else {
88new_segment:
89 if (!sg)
90 sg = sglist;
91 else {
92 sg->page_link &= ~0x02;
93 sg = sg_next(sg);
94 }
95
96 sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset);
97 segments++;
98 }
99
100 ivprv = iv;
101 }
102
103 if (sg)
104 sg_mark_end(sg);
105
106 return segments;
107}
108EXPORT_SYMBOL(blk_rq_map_integrity_sg);
109
110/**
111 * blk_integrity_compare - Compare integrity profile of two block devices
112 * @b1: Device to compare
113 * @b2: Device to compare
114 *
115 * Description: Meta-devices like DM and MD need to verify that all
116 * sub-devices use the same integrity format before advertising to
117 * upper layers that they can send/receive integrity metadata. This
118 * function can be used to check whether two block devices have
119 * compatible integrity formats.
120 */
121int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
122{
123 struct blk_integrity *b1 = bd1->bd_disk->integrity;
124 struct blk_integrity *b2 = bd2->bd_disk->integrity;
125
126 BUG_ON(bd1->bd_disk == NULL);
127 BUG_ON(bd2->bd_disk == NULL);
128
129 if (!b1 || !b2)
130 return 0;
131
132 if (b1->sector_size != b2->sector_size) {
133 printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
134 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
135 b1->sector_size, b2->sector_size);
136 return -1;
137 }
138
139 if (b1->tuple_size != b2->tuple_size) {
140 printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
141 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
142 b1->tuple_size, b2->tuple_size);
143 return -1;
144 }
145
146 if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
147 printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
148 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
149 b1->tag_size, b2->tag_size);
150 return -1;
151 }
152
153 if (strcmp(b1->name, b2->name)) {
154 printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
155 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
156 b1->name, b2->name);
157 return -1;
158 }
159
160 return 0;
161}
162EXPORT_SYMBOL(blk_integrity_compare);
163
164struct integrity_sysfs_entry {
165 struct attribute attr;
166 ssize_t (*show)(struct blk_integrity *, char *);
167 ssize_t (*store)(struct blk_integrity *, const char *, size_t);
168};
169
170static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
171 char *page)
172{
173 struct blk_integrity *bi =
174 container_of(kobj, struct blk_integrity, kobj);
175 struct integrity_sysfs_entry *entry =
176 container_of(attr, struct integrity_sysfs_entry, attr);
177
178 return entry->show(bi, page);
179}
180
181static ssize_t integrity_attr_store(struct kobject *kobj,
182 struct attribute *attr, const char *page,
183 size_t count)
184{
185 struct blk_integrity *bi =
186 container_of(kobj, struct blk_integrity, kobj);
187 struct integrity_sysfs_entry *entry =
188 container_of(attr, struct integrity_sysfs_entry, attr);
189 ssize_t ret = 0;
190
191 if (entry->store)
192 ret = entry->store(bi, page, count);
193
194 return ret;
195}
196
197static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
198{
199 if (bi != NULL && bi->name != NULL)
200 return sprintf(page, "%s\n", bi->name);
201 else
202 return sprintf(page, "none\n");
203}
204
205static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
206{
207 if (bi != NULL)
208 return sprintf(page, "%u\n", bi->tag_size);
209 else
210 return sprintf(page, "0\n");
211}
212
213static ssize_t integrity_read_store(struct blk_integrity *bi,
214 const char *page, size_t count)
215{
216 char *p = (char *) page;
217 unsigned long val = simple_strtoul(p, &p, 10);
218
219 if (val)
220 bi->flags |= INTEGRITY_FLAG_READ;
221 else
222 bi->flags &= ~INTEGRITY_FLAG_READ;
223
224 return count;
225}
226
227static ssize_t integrity_read_show(struct blk_integrity *bi, char *page)
228{
229 return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0);
230}
231
232static ssize_t integrity_write_store(struct blk_integrity *bi,
233 const char *page, size_t count)
234{
235 char *p = (char *) page;
236 unsigned long val = simple_strtoul(p, &p, 10);
237
238 if (val)
239 bi->flags |= INTEGRITY_FLAG_WRITE;
240 else
241 bi->flags &= ~INTEGRITY_FLAG_WRITE;
242
243 return count;
244}
245
246static ssize_t integrity_write_show(struct blk_integrity *bi, char *page)
247{
248 return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0);
249}
250
251static struct integrity_sysfs_entry integrity_format_entry = {
252 .attr = { .name = "format", .mode = S_IRUGO },
253 .show = integrity_format_show,
254};
255
256static struct integrity_sysfs_entry integrity_tag_size_entry = {
257 .attr = { .name = "tag_size", .mode = S_IRUGO },
258 .show = integrity_tag_size_show,
259};
260
261static struct integrity_sysfs_entry integrity_read_entry = {
262 .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
263 .show = integrity_read_show,
264 .store = integrity_read_store,
265};
266
267static struct integrity_sysfs_entry integrity_write_entry = {
268 .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
269 .show = integrity_write_show,
270 .store = integrity_write_store,
271};
272
273static struct attribute *integrity_attrs[] = {
274 &integrity_format_entry.attr,
275 &integrity_tag_size_entry.attr,
276 &integrity_read_entry.attr,
277 &integrity_write_entry.attr,
278 NULL,
279};
280
281static struct sysfs_ops integrity_ops = {
282 .show = &integrity_attr_show,
283 .store = &integrity_attr_store,
284};
285
286static int __init blk_dev_integrity_init(void)
287{
288 integrity_cachep = kmem_cache_create("blkdev_integrity",
289 sizeof(struct blk_integrity),
290 0, SLAB_PANIC, NULL);
291 return 0;
292}
293subsys_initcall(blk_dev_integrity_init);
294
295static void blk_integrity_release(struct kobject *kobj)
296{
297 struct blk_integrity *bi =
298 container_of(kobj, struct blk_integrity, kobj);
299
300 kmem_cache_free(integrity_cachep, bi);
301}
302
303static struct kobj_type integrity_ktype = {
304 .default_attrs = integrity_attrs,
305 .sysfs_ops = &integrity_ops,
306 .release = blk_integrity_release,
307};
308
309/**
310 * blk_integrity_register - Register a gendisk as being integrity-capable
311 * @disk: struct gendisk pointer to make integrity-aware
312 * @template: integrity profile
313 *
314 * Description: When a device needs to advertise itself as being able
315 * to send/receive integrity metadata it must use this function to
316 * register the capability with the block layer. The template is a
317 * blk_integrity struct with values appropriate for the underlying
318 * hardware. See Documentation/block/data-integrity.txt.
319 */
320int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
321{
322 struct blk_integrity *bi;
323
324 BUG_ON(disk == NULL);
325 BUG_ON(template == NULL);
326
327 if (disk->integrity == NULL) {
328 bi = kmem_cache_alloc(integrity_cachep,
329 GFP_KERNEL | __GFP_ZERO);
330 if (!bi)
331 return -1;
332
333 if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
334 &disk->dev.kobj, "%s", "integrity")) {
335 kmem_cache_free(integrity_cachep, bi);
336 return -1;
337 }
338
339 kobject_uevent(&bi->kobj, KOBJ_ADD);
340
341 bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
342 bi->sector_size = disk->queue->hardsect_size;
343 disk->integrity = bi;
344 } else
345 bi = disk->integrity;
346
347 /* Use the provided profile as template */
348 bi->name = template->name;
349 bi->generate_fn = template->generate_fn;
350 bi->verify_fn = template->verify_fn;
351 bi->tuple_size = template->tuple_size;
352 bi->set_tag_fn = template->set_tag_fn;
353 bi->get_tag_fn = template->get_tag_fn;
354 bi->tag_size = template->tag_size;
355
356 return 0;
357}
358EXPORT_SYMBOL(blk_integrity_register);
359
360/**
361 * blk_integrity_unregister - Remove block integrity profile
362 * @disk: disk whose integrity profile to deallocate
363 *
364 * Description: This function frees all memory used by the block
365 * integrity profile. To be called at device teardown.
366 */
367void blk_integrity_unregister(struct gendisk *disk)
368{
369 struct blk_integrity *bi;
370
371 if (!disk || !disk->integrity)
372 return;
373
374 bi = disk->integrity;
375
376 kobject_uevent(&bi->kobj, KOBJ_REMOVE);
377 kobject_del(&bi->kobj);
378 kobject_put(&disk->dev.kobj);
379 kmem_cache_free(integrity_cachep, bi);
380}
381EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/block/blk-map.c b/block/blk-map.c
index 0b1af5a3537c..ddd96fb11a7d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -210,6 +210,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
210 if (!bio_flagged(bio, BIO_USER_MAPPED)) 210 if (!bio_flagged(bio, BIO_USER_MAPPED))
211 rq->cmd_flags |= REQ_COPY_USER; 211 rq->cmd_flags |= REQ_COPY_USER;
212 212
213 blk_queue_bounce(q, &bio);
213 bio_get(bio); 214 bio_get(bio);
214 blk_rq_bio_prep(q, rq, bio); 215 blk_rq_bio_prep(q, rq, bio);
215 rq->buffer = rq->data = NULL; 216 rq->buffer = rq->data = NULL;
@@ -268,6 +269,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
268 int reading = rq_data_dir(rq) == READ; 269 int reading = rq_data_dir(rq) == READ;
269 int do_copy = 0; 270 int do_copy = 0;
270 struct bio *bio; 271 struct bio *bio;
272 unsigned long stack_mask = ~(THREAD_SIZE - 1);
271 273
272 if (len > (q->max_hw_sectors << 9)) 274 if (len > (q->max_hw_sectors << 9))
273 return -EINVAL; 275 return -EINVAL;
@@ -278,6 +280,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
278 alignment = queue_dma_alignment(q) | q->dma_pad_mask; 280 alignment = queue_dma_alignment(q) | q->dma_pad_mask;
279 do_copy = ((kaddr & alignment) || (len & alignment)); 281 do_copy = ((kaddr & alignment) || (len & alignment));
280 282
283 if (!((kaddr & stack_mask) ^
284 ((unsigned long)current->stack & stack_mask)))
285 do_copy = 1;
286
281 if (do_copy) 287 if (do_copy)
282 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 288 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
283 else 289 else
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 651136aae76e..5efc9e7a68b7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -441,6 +441,9 @@ static int attempt_merge(struct request_queue *q, struct request *req,
441 || next->special) 441 || next->special)
442 return 0; 442 return 0;
443 443
444 if (blk_integrity_rq(req) != blk_integrity_rq(next))
445 return 0;
446
444 /* 447 /*
445 * If we are allowed to merge, then append bio list 448 * If we are allowed to merge, then append bio list
446 * from next to rq and release next. merge_requests_fn 449 * from next to rq and release next. merge_requests_fn
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8dd86418f35d..dfc77012843f 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -302,11 +302,10 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
302 * @q: the request queue for the device 302 * @q: the request queue for the device
303 * @mask: pad mask 303 * @mask: pad mask
304 * 304 *
305 * Set pad mask. Direct IO requests are padded to the mask specified. 305 * Set dma pad mask.
306 * 306 *
307 * Appending pad buffer to a request modifies ->data_len such that it 307 * Appending pad buffer to a request modifies the last entry of a
308 * includes the pad buffer. The original requested data length can be 308 * scatter list such that it includes the pad buffer.
309 * obtained using blk_rq_raw_data_len().
310 **/ 309 **/
311void blk_queue_dma_pad(struct request_queue *q, unsigned int mask) 310void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
312{ 311{
@@ -315,6 +314,23 @@ void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
315EXPORT_SYMBOL(blk_queue_dma_pad); 314EXPORT_SYMBOL(blk_queue_dma_pad);
316 315
317/** 316/**
317 * blk_queue_update_dma_pad - update pad mask
318 * @q: the request queue for the device
319 * @mask: pad mask
320 *
321 * Update dma pad mask.
322 *
323 * Appending pad buffer to a request modifies the last entry of a
324 * scatter list such that it includes the pad buffer.
325 **/
326void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
327{
328 if (mask > q->dma_pad_mask)
329 q->dma_pad_mask = mask;
330}
331EXPORT_SYMBOL(blk_queue_update_dma_pad);
332
333/**
318 * blk_queue_dma_drain - Set up a drain buffer for excess dma. 334 * blk_queue_dma_drain - Set up a drain buffer for excess dma.
319 * @q: the request queue for the device 335 * @q: the request queue for the device
320 * @dma_drain_needed: fn which returns non-zero if drain is necessary 336 * @dma_drain_needed: fn which returns non-zero if drain is necessary
diff --git a/block/blk.h b/block/blk.h
index 59776ab4742a..c79f30e1df52 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,4 +51,12 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
51 return q->nr_congestion_off; 51 return q->nr_congestion_off;
52} 52}
53 53
54#if defined(CONFIG_BLK_DEV_INTEGRITY)
55
56#define rq_for_each_integrity_segment(bvl, _rq, _iter) \
57 __rq_for_each_bio(_iter.bio, _rq) \
58 bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i)
59
60#endif /* BLK_DEV_INTEGRITY */
61
54#endif 62#endif
diff --git a/block/blktrace.c b/block/blktrace.c
index 8d3a27780260..eb9651ccb241 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -244,6 +244,7 @@ err:
244static void blk_trace_cleanup(struct blk_trace *bt) 244static void blk_trace_cleanup(struct blk_trace *bt)
245{ 245{
246 relay_close(bt->rchan); 246 relay_close(bt->rchan);
247 debugfs_remove(bt->msg_file);
247 debugfs_remove(bt->dropped_file); 248 debugfs_remove(bt->dropped_file);
248 blk_remove_tree(bt->dir); 249 blk_remove_tree(bt->dir);
249 free_percpu(bt->sequence); 250 free_percpu(bt->sequence);
@@ -291,6 +292,44 @@ static const struct file_operations blk_dropped_fops = {
291 .read = blk_dropped_read, 292 .read = blk_dropped_read,
292}; 293};
293 294
295static int blk_msg_open(struct inode *inode, struct file *filp)
296{
297 filp->private_data = inode->i_private;
298
299 return 0;
300}
301
302static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
303 size_t count, loff_t *ppos)
304{
305 char *msg;
306 struct blk_trace *bt;
307
308 if (count > BLK_TN_MAX_MSG)
309 return -EINVAL;
310
311 msg = kmalloc(count, GFP_KERNEL);
312 if (msg == NULL)
313 return -ENOMEM;
314
315 if (copy_from_user(msg, buffer, count)) {
316 kfree(msg);
317 return -EFAULT;
318 }
319
320 bt = filp->private_data;
321 __trace_note_message(bt, "%s", msg);
322 kfree(msg);
323
324 return count;
325}
326
327static const struct file_operations blk_msg_fops = {
328 .owner = THIS_MODULE,
329 .open = blk_msg_open,
330 .write = blk_msg_write,
331};
332
294/* 333/*
295 * Keep track of how many times we encountered a full subbuffer, to aid 334 * Keep track of how many times we encountered a full subbuffer, to aid
296 * the user space app in telling how many lost events there were. 335 * the user space app in telling how many lost events there were.
@@ -380,6 +419,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
380 if (!bt->dropped_file) 419 if (!bt->dropped_file)
381 goto err; 420 goto err;
382 421
422 bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
423 if (!bt->msg_file)
424 goto err;
425
383 bt->rchan = relay_open("trace", dir, buts->buf_size, 426 bt->rchan = relay_open("trace", dir, buts->buf_size,
384 buts->buf_nr, &blk_relay_callbacks, bt); 427 buts->buf_nr, &blk_relay_callbacks, bt);
385 if (!bt->rchan) 428 if (!bt->rchan)
@@ -409,6 +452,8 @@ err:
409 if (dir) 452 if (dir)
410 blk_remove_tree(dir); 453 blk_remove_tree(dir);
411 if (bt) { 454 if (bt) {
455 if (bt->msg_file)
456 debugfs_remove(bt->msg_file);
412 if (bt->dropped_file) 457 if (bt->dropped_file)
413 debugfs_remove(bt->dropped_file); 458 debugfs_remove(bt->dropped_file);
414 free_percpu(bt->sequence); 459 free_percpu(bt->sequence);
diff --git a/block/bsg.c b/block/bsg.c
index 7c59ffaedfe0..0b3b282f0384 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -45,11 +45,12 @@ struct bsg_device {
45 char name[BUS_ID_SIZE]; 45 char name[BUS_ID_SIZE];
46 int max_queue; 46 int max_queue;
47 unsigned long flags; 47 unsigned long flags;
48 struct blk_scsi_cmd_filter *cmd_filter;
49 mode_t *f_mode;
48}; 50};
49 51
50enum { 52enum {
51 BSG_F_BLOCK = 1, 53 BSG_F_BLOCK = 1,
52 BSG_F_WRITE_PERM = 2,
53}; 54};
54 55
55#define BSG_DEFAULT_CMDS 64 56#define BSG_DEFAULT_CMDS 64
@@ -173,7 +174,7 @@ unlock:
173} 174}
174 175
175static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, 176static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
176 struct sg_io_v4 *hdr, int has_write_perm) 177 struct sg_io_v4 *hdr, struct bsg_device *bd)
177{ 178{
178 if (hdr->request_len > BLK_MAX_CDB) { 179 if (hdr->request_len > BLK_MAX_CDB) {
179 rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL); 180 rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -186,7 +187,8 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
186 return -EFAULT; 187 return -EFAULT;
187 188
188 if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { 189 if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
189 if (blk_verify_command(rq->cmd, has_write_perm)) 190 if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd,
191 bd->f_mode))
190 return -EPERM; 192 return -EPERM;
191 } else if (!capable(CAP_SYS_RAWIO)) 193 } else if (!capable(CAP_SYS_RAWIO))
192 return -EPERM; 194 return -EPERM;
@@ -264,8 +266,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
264 rq = blk_get_request(q, rw, GFP_KERNEL); 266 rq = blk_get_request(q, rw, GFP_KERNEL);
265 if (!rq) 267 if (!rq)
266 return ERR_PTR(-ENOMEM); 268 return ERR_PTR(-ENOMEM);
267 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM, 269 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
268 &bd->flags));
269 if (ret) 270 if (ret)
270 goto out; 271 goto out;
271 272
@@ -567,12 +568,23 @@ static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
567 set_bit(BSG_F_BLOCK, &bd->flags); 568 set_bit(BSG_F_BLOCK, &bd->flags);
568} 569}
569 570
570static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file) 571static void bsg_set_cmd_filter(struct bsg_device *bd,
572 struct file *file)
571{ 573{
572 if (file->f_mode & FMODE_WRITE) 574 struct inode *inode;
573 set_bit(BSG_F_WRITE_PERM, &bd->flags); 575 struct gendisk *disk;
574 else 576
575 clear_bit(BSG_F_WRITE_PERM, &bd->flags); 577 if (!file)
578 return;
579
580 inode = file->f_dentry->d_inode;
581 if (!inode)
582 return;
583
584 disk = inode->i_bdev->bd_disk;
585
586 bd->cmd_filter = &disk->cmd_filter;
587 bd->f_mode = &file->f_mode;
576} 588}
577 589
578/* 590/*
@@ -596,6 +608,8 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
596 dprintk("%s: read %Zd bytes\n", bd->name, count); 608 dprintk("%s: read %Zd bytes\n", bd->name, count);
597 609
598 bsg_set_block(bd, file); 610 bsg_set_block(bd, file);
611 bsg_set_cmd_filter(bd, file);
612
599 bytes_read = 0; 613 bytes_read = 0;
600 ret = __bsg_read(buf, count, bd, NULL, &bytes_read); 614 ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
601 *ppos = bytes_read; 615 *ppos = bytes_read;
@@ -669,7 +683,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
669 dprintk("%s: write %Zd bytes\n", bd->name, count); 683 dprintk("%s: write %Zd bytes\n", bd->name, count);
670 684
671 bsg_set_block(bd, file); 685 bsg_set_block(bd, file);
672 bsg_set_write_perm(bd, file); 686 bsg_set_cmd_filter(bd, file);
673 687
674 bytes_written = 0; 688 bytes_written = 0;
675 ret = __bsg_write(bd, buf, count, &bytes_written); 689 ret = __bsg_write(bd, buf, count, &bytes_written);
@@ -773,7 +787,9 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
773 } 787 }
774 788
775 bd->queue = rq; 789 bd->queue = rq;
790
776 bsg_set_block(bd, file); 791 bsg_set_block(bd, file);
792 bsg_set_cmd_filter(bd, file);
777 793
778 atomic_set(&bd->ref_count, 1); 794 atomic_set(&bd->ref_count, 1);
779 mutex_lock(&bsg_mutex); 795 mutex_lock(&bsg_mutex);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index d01b411c72f0..1e2aff812ee2 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -11,6 +11,7 @@
11#include <linux/elevator.h> 11#include <linux/elevator.h>
12#include <linux/rbtree.h> 12#include <linux/rbtree.h>
13#include <linux/ioprio.h> 13#include <linux/ioprio.h>
14#include <linux/blktrace_api.h>
14 15
15/* 16/*
16 * tunables 17 * tunables
@@ -41,13 +42,14 @@ static int cfq_slice_idle = HZ / 125;
41 42
42#define RQ_CIC(rq) \ 43#define RQ_CIC(rq) \
43 ((struct cfq_io_context *) (rq)->elevator_private) 44 ((struct cfq_io_context *) (rq)->elevator_private)
44#define RQ_CFQQ(rq) ((rq)->elevator_private2) 45#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2)
45 46
46static struct kmem_cache *cfq_pool; 47static struct kmem_cache *cfq_pool;
47static struct kmem_cache *cfq_ioc_pool; 48static struct kmem_cache *cfq_ioc_pool;
48 49
49static DEFINE_PER_CPU(unsigned long, ioc_count); 50static DEFINE_PER_CPU(unsigned long, ioc_count);
50static struct completion *ioc_gone; 51static struct completion *ioc_gone;
52static DEFINE_SPINLOCK(ioc_gone_lock);
51 53
52#define CFQ_PRIO_LISTS IOPRIO_BE_NR 54#define CFQ_PRIO_LISTS IOPRIO_BE_NR
53#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) 55#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -155,6 +157,7 @@ struct cfq_queue {
155 unsigned short ioprio, org_ioprio; 157 unsigned short ioprio, org_ioprio;
156 unsigned short ioprio_class, org_ioprio_class; 158 unsigned short ioprio_class, org_ioprio_class;
157 159
160 pid_t pid;
158}; 161};
159 162
160enum cfqq_state_flags { 163enum cfqq_state_flags {
@@ -198,6 +201,11 @@ CFQ_CFQQ_FNS(slice_new);
198CFQ_CFQQ_FNS(sync); 201CFQ_CFQQ_FNS(sync);
199#undef CFQ_CFQQ_FNS 202#undef CFQ_CFQQ_FNS
200 203
204#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
205 blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
206#define cfq_log(cfqd, fmt, args...) \
207 blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
208
201static void cfq_dispatch_insert(struct request_queue *, struct request *); 209static void cfq_dispatch_insert(struct request_queue *, struct request *);
202static struct cfq_queue *cfq_get_queue(struct cfq_data *, int, 210static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
203 struct io_context *, gfp_t); 211 struct io_context *, gfp_t);
@@ -234,8 +242,10 @@ static inline int cfq_bio_sync(struct bio *bio)
234 */ 242 */
235static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) 243static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
236{ 244{
237 if (cfqd->busy_queues) 245 if (cfqd->busy_queues) {
246 cfq_log(cfqd, "schedule dispatch");
238 kblockd_schedule_work(&cfqd->unplug_work); 247 kblockd_schedule_work(&cfqd->unplug_work);
248 }
239} 249}
240 250
241static int cfq_queue_empty(struct request_queue *q) 251static int cfq_queue_empty(struct request_queue *q)
@@ -270,6 +280,7 @@ static inline void
270cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) 280cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
271{ 281{
272 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; 282 cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
283 cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
273} 284}
274 285
275/* 286/*
@@ -539,6 +550,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
539 */ 550 */
540static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 551static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
541{ 552{
553 cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
542 BUG_ON(cfq_cfqq_on_rr(cfqq)); 554 BUG_ON(cfq_cfqq_on_rr(cfqq));
543 cfq_mark_cfqq_on_rr(cfqq); 555 cfq_mark_cfqq_on_rr(cfqq);
544 cfqd->busy_queues++; 556 cfqd->busy_queues++;
@@ -552,6 +564,7 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
552 */ 564 */
553static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) 565static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
554{ 566{
567 cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
555 BUG_ON(!cfq_cfqq_on_rr(cfqq)); 568 BUG_ON(!cfq_cfqq_on_rr(cfqq));
556 cfq_clear_cfqq_on_rr(cfqq); 569 cfq_clear_cfqq_on_rr(cfqq);
557 570
@@ -638,6 +651,8 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
638 struct cfq_data *cfqd = q->elevator->elevator_data; 651 struct cfq_data *cfqd = q->elevator->elevator_data;
639 652
640 cfqd->rq_in_driver++; 653 cfqd->rq_in_driver++;
654 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
655 cfqd->rq_in_driver);
641 656
642 /* 657 /*
643 * If the depth is larger 1, it really could be queueing. But lets 658 * If the depth is larger 1, it really could be queueing. But lets
@@ -657,6 +672,8 @@ static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
657 672
658 WARN_ON(!cfqd->rq_in_driver); 673 WARN_ON(!cfqd->rq_in_driver);
659 cfqd->rq_in_driver--; 674 cfqd->rq_in_driver--;
675 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
676 cfqd->rq_in_driver);
660} 677}
661 678
662static void cfq_remove_request(struct request *rq) 679static void cfq_remove_request(struct request *rq)
@@ -746,6 +763,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
746 struct cfq_queue *cfqq) 763 struct cfq_queue *cfqq)
747{ 764{
748 if (cfqq) { 765 if (cfqq) {
766 cfq_log_cfqq(cfqd, cfqq, "set_active");
749 cfqq->slice_end = 0; 767 cfqq->slice_end = 0;
750 cfq_clear_cfqq_must_alloc_slice(cfqq); 768 cfq_clear_cfqq_must_alloc_slice(cfqq);
751 cfq_clear_cfqq_fifo_expire(cfqq); 769 cfq_clear_cfqq_fifo_expire(cfqq);
@@ -763,6 +781,8 @@ static void
763__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, 781__cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
764 int timed_out) 782 int timed_out)
765{ 783{
784 cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
785
766 if (cfq_cfqq_wait_request(cfqq)) 786 if (cfq_cfqq_wait_request(cfqq))
767 del_timer(&cfqd->idle_slice_timer); 787 del_timer(&cfqd->idle_slice_timer);
768 788
@@ -772,8 +792,10 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
772 /* 792 /*
773 * store what was left of this slice, if the queue idled/timed out 793 * store what was left of this slice, if the queue idled/timed out
774 */ 794 */
775 if (timed_out && !cfq_cfqq_slice_new(cfqq)) 795 if (timed_out && !cfq_cfqq_slice_new(cfqq)) {
776 cfqq->slice_resid = cfqq->slice_end - jiffies; 796 cfqq->slice_resid = cfqq->slice_end - jiffies;
797 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
798 }
777 799
778 cfq_resort_rr_list(cfqd, cfqq); 800 cfq_resort_rr_list(cfqd, cfqq);
779 801
@@ -866,6 +888,12 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
866 return; 888 return;
867 889
868 /* 890 /*
891 * still requests with the driver, don't idle
892 */
893 if (cfqd->rq_in_driver)
894 return;
895
896 /*
869 * task has exited, don't wait 897 * task has exited, don't wait
870 */ 898 */
871 cic = cfqd->active_cic; 899 cic = cfqd->active_cic;
@@ -892,6 +920,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
892 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT)); 920 sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
893 921
894 mod_timer(&cfqd->idle_slice_timer, jiffies + sl); 922 mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
923 cfq_log(cfqd, "arm_idle: %lu", sl);
895} 924}
896 925
897/* 926/*
@@ -902,6 +931,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
902 struct cfq_data *cfqd = q->elevator->elevator_data; 931 struct cfq_data *cfqd = q->elevator->elevator_data;
903 struct cfq_queue *cfqq = RQ_CFQQ(rq); 932 struct cfq_queue *cfqq = RQ_CFQQ(rq);
904 933
934 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
935
905 cfq_remove_request(rq); 936 cfq_remove_request(rq);
906 cfqq->dispatched++; 937 cfqq->dispatched++;
907 elv_dispatch_sort(q, rq); 938 elv_dispatch_sort(q, rq);
@@ -931,8 +962,9 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
931 rq = rq_entry_fifo(cfqq->fifo.next); 962 rq = rq_entry_fifo(cfqq->fifo.next);
932 963
933 if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) 964 if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
934 return NULL; 965 rq = NULL;
935 966
967 cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq);
936 return rq; 968 return rq;
937} 969}
938 970
@@ -1072,6 +1104,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
1072 1104
1073 BUG_ON(cfqd->busy_queues); 1105 BUG_ON(cfqd->busy_queues);
1074 1106
1107 cfq_log(cfqd, "forced_dispatch=%d\n", dispatched);
1075 return dispatched; 1108 return dispatched;
1076} 1109}
1077 1110
@@ -1112,6 +1145,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
1112 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); 1145 dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
1113 } 1146 }
1114 1147
1148 cfq_log(cfqd, "dispatched=%d", dispatched);
1115 return dispatched; 1149 return dispatched;
1116} 1150}
1117 1151
@@ -1130,6 +1164,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
1130 if (!atomic_dec_and_test(&cfqq->ref)) 1164 if (!atomic_dec_and_test(&cfqq->ref))
1131 return; 1165 return;
1132 1166
1167 cfq_log_cfqq(cfqd, cfqq, "put_queue");
1133 BUG_ON(rb_first(&cfqq->sort_list)); 1168 BUG_ON(rb_first(&cfqq->sort_list));
1134 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 1169 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
1135 BUG_ON(cfq_cfqq_on_rr(cfqq)); 1170 BUG_ON(cfq_cfqq_on_rr(cfqq));
@@ -1177,8 +1212,19 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
1177 kmem_cache_free(cfq_ioc_pool, cic); 1212 kmem_cache_free(cfq_ioc_pool, cic);
1178 elv_ioc_count_dec(ioc_count); 1213 elv_ioc_count_dec(ioc_count);
1179 1214
1180 if (ioc_gone && !elv_ioc_count_read(ioc_count)) 1215 if (ioc_gone) {
1181 complete(ioc_gone); 1216 /*
1217 * CFQ scheduler is exiting, grab exit lock and check
1218 * the pending io context count. If it hits zero,
1219 * complete ioc_gone and set it back to NULL
1220 */
1221 spin_lock(&ioc_gone_lock);
1222 if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
1223 complete(ioc_gone);
1224 ioc_gone = NULL;
1225 }
1226 spin_unlock(&ioc_gone_lock);
1227 }
1182} 1228}
1183 1229
1184static void cfq_cic_free(struct cfq_io_context *cic) 1230static void cfq_cic_free(struct cfq_io_context *cic)
@@ -1427,6 +1473,8 @@ retry:
1427 cfq_mark_cfqq_idle_window(cfqq); 1473 cfq_mark_cfqq_idle_window(cfqq);
1428 cfq_mark_cfqq_sync(cfqq); 1474 cfq_mark_cfqq_sync(cfqq);
1429 } 1475 }
1476 cfqq->pid = current->pid;
1477 cfq_log_cfqq(cfqd, cfqq, "alloced");
1430 } 1478 }
1431 1479
1432 if (new_cfqq) 1480 if (new_cfqq)
@@ -1675,7 +1723,7 @@ static void
1675cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, 1723cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1676 struct cfq_io_context *cic) 1724 struct cfq_io_context *cic)
1677{ 1725{
1678 int enable_idle; 1726 int old_idle, enable_idle;
1679 1727
1680 /* 1728 /*
1681 * Don't idle for async or idle io prio class 1729 * Don't idle for async or idle io prio class
@@ -1683,7 +1731,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1683 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq)) 1731 if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
1684 return; 1732 return;
1685 1733
1686 enable_idle = cfq_cfqq_idle_window(cfqq); 1734 enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
1687 1735
1688 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || 1736 if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
1689 (cfqd->hw_tag && CIC_SEEKY(cic))) 1737 (cfqd->hw_tag && CIC_SEEKY(cic)))
@@ -1695,10 +1743,13 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1695 enable_idle = 1; 1743 enable_idle = 1;
1696 } 1744 }
1697 1745
1698 if (enable_idle) 1746 if (old_idle != enable_idle) {
1699 cfq_mark_cfqq_idle_window(cfqq); 1747 cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
1700 else 1748 if (enable_idle)
1701 cfq_clear_cfqq_idle_window(cfqq); 1749 cfq_mark_cfqq_idle_window(cfqq);
1750 else
1751 cfq_clear_cfqq_idle_window(cfqq);
1752 }
1702} 1753}
1703 1754
1704/* 1755/*
@@ -1757,6 +1808,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
1757 */ 1808 */
1758static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) 1809static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
1759{ 1810{
1811 cfq_log_cfqq(cfqd, cfqq, "preempt");
1760 cfq_slice_expired(cfqd, 1); 1812 cfq_slice_expired(cfqd, 1);
1761 1813
1762 /* 1814 /*
@@ -1818,6 +1870,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
1818 struct cfq_data *cfqd = q->elevator->elevator_data; 1870 struct cfq_data *cfqd = q->elevator->elevator_data;
1819 struct cfq_queue *cfqq = RQ_CFQQ(rq); 1871 struct cfq_queue *cfqq = RQ_CFQQ(rq);
1820 1872
1873 cfq_log_cfqq(cfqd, cfqq, "insert_request");
1821 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc); 1874 cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
1822 1875
1823 cfq_add_rq_rb(rq); 1876 cfq_add_rq_rb(rq);
@@ -1835,6 +1888,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
1835 unsigned long now; 1888 unsigned long now;
1836 1889
1837 now = jiffies; 1890 now = jiffies;
1891 cfq_log_cfqq(cfqd, cfqq, "complete");
1838 1892
1839 WARN_ON(!cfqd->rq_in_driver); 1893 WARN_ON(!cfqd->rq_in_driver);
1840 WARN_ON(!cfqq->dispatched); 1894 WARN_ON(!cfqq->dispatched);
@@ -2004,6 +2058,7 @@ queue_fail:
2004 2058
2005 cfq_schedule_dispatch(cfqd); 2059 cfq_schedule_dispatch(cfqd);
2006 spin_unlock_irqrestore(q->queue_lock, flags); 2060 spin_unlock_irqrestore(q->queue_lock, flags);
2061 cfq_log(cfqd, "set_request fail");
2007 return 1; 2062 return 1;
2008} 2063}
2009 2064
@@ -2029,6 +2084,8 @@ static void cfq_idle_slice_timer(unsigned long data)
2029 unsigned long flags; 2084 unsigned long flags;
2030 int timed_out = 1; 2085 int timed_out = 1;
2031 2086
2087 cfq_log(cfqd, "idle timer fired");
2088
2032 spin_lock_irqsave(cfqd->queue->queue_lock, flags); 2089 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
2033 2090
2034 cfqq = cfqd->active_queue; 2091 cfqq = cfqd->active_queue;
@@ -2317,7 +2374,7 @@ static void __exit cfq_exit(void)
2317 * pending RCU callbacks 2374 * pending RCU callbacks
2318 */ 2375 */
2319 if (elv_ioc_count_read(ioc_count)) 2376 if (elv_ioc_count_read(ioc_count))
2320 wait_for_completion(ioc_gone); 2377 wait_for_completion(&all_gone);
2321 cfq_slab_kill(); 2378 cfq_slab_kill();
2322} 2379}
2323 2380
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
new file mode 100644
index 000000000000..eec4404fd357
--- /dev/null
+++ b/block/cmd-filter.c
@@ -0,0 +1,334 @@
1/*
2 * Copyright 2004 Peter M. Jones <pjones@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 *
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public Licens
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 *
18 */
19
20#include <linux/list.h>
21#include <linux/genhd.h>
22#include <linux/spinlock.h>
23#include <linux/parser.h>
24#include <linux/capability.h>
25#include <linux/bitops.h>
26
27#include <scsi/scsi.h>
28#include <linux/cdrom.h>
29
30int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
31 unsigned char *cmd, mode_t *f_mode)
32{
33 /* root can do any command. */
34 if (capable(CAP_SYS_RAWIO))
35 return 0;
36
37 /* if there's no filter set, assume we're filtering everything out */
38 if (!filter)
39 return -EPERM;
40
41 /* Anybody who can open the device can do a read-safe command */
42 if (test_bit(cmd[0], filter->read_ok))
43 return 0;
44
45 /* Write-safe commands require a writable open */
46 if (test_bit(cmd[0], filter->write_ok) && (*f_mode & FMODE_WRITE))
47 return 0;
48
49 return -EPERM;
50}
51EXPORT_SYMBOL(blk_cmd_filter_verify_command);
52
53int blk_verify_command(struct file *file, unsigned char *cmd)
54{
55 struct gendisk *disk;
56 struct inode *inode;
57
58 if (!file)
59 return -EINVAL;
60
61 inode = file->f_dentry->d_inode;
62 if (!inode)
63 return -EINVAL;
64
65 disk = inode->i_bdev->bd_disk;
66
67 return blk_cmd_filter_verify_command(&disk->cmd_filter,
68 cmd, &file->f_mode);
69}
70EXPORT_SYMBOL(blk_verify_command);
71
72/* and now, the sysfs stuff */
73static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
74 int rw)
75{
76 char *npage = page;
77 unsigned long *okbits;
78 int i;
79
80 if (rw == READ)
81 okbits = filter->read_ok;
82 else
83 okbits = filter->write_ok;
84
85 for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
86 if (test_bit(i, okbits)) {
87 sprintf(npage, "%02x", i);
88 npage += 2;
89 if (i < BLK_SCSI_MAX_CMDS - 1)
90 sprintf(npage++, " ");
91 }
92 }
93
94 if (npage != page)
95 npage += sprintf(npage, "\n");
96
97 return npage - page;
98}
99
100static ssize_t rcf_readcmds_show(struct blk_scsi_cmd_filter *filter, char *page)
101{
102 return rcf_cmds_show(filter, page, READ);
103}
104
105static ssize_t rcf_writecmds_show(struct blk_scsi_cmd_filter *filter,
106 char *page)
107{
108 return rcf_cmds_show(filter, page, WRITE);
109}
110
111static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
112 const char *page, size_t count, int rw)
113{
114 ssize_t ret = 0;
115 unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
116 int cmd, status, len;
117 substring_t ss;
118
119 memset(&okbits, 0, sizeof(okbits));
120
121 for (len = strlen(page); len > 0; len -= 3) {
122 if (len < 2)
123 break;
124 ss.from = (char *) page + ret;
125 ss.to = (char *) page + ret + 2;
126 ret += 3;
127 status = match_hex(&ss, &cmd);
128 /* either of these cases means invalid input, so do nothing. */
129 if (status || cmd >= BLK_SCSI_MAX_CMDS)
130 return -EINVAL;
131
132 __set_bit(cmd, okbits);
133 }
134
135 if (rw == READ)
136 target_okbits = filter->read_ok;
137 else
138 target_okbits = filter->write_ok;
139
140 memmove(target_okbits, okbits, sizeof(okbits));
141 return count;
142}
143
144static ssize_t rcf_readcmds_store(struct blk_scsi_cmd_filter *filter,
145 const char *page, size_t count)
146{
147 return rcf_cmds_store(filter, page, count, READ);
148}
149
150static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
151 const char *page, size_t count)
152{
153 return rcf_cmds_store(filter, page, count, WRITE);
154}
155
156struct rcf_sysfs_entry {
157 struct attribute attr;
158 ssize_t (*show)(struct blk_scsi_cmd_filter *, char *);
159 ssize_t (*store)(struct blk_scsi_cmd_filter *, const char *, size_t);
160};
161
162static struct rcf_sysfs_entry rcf_readcmds_entry = {
163 .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
164 .show = rcf_readcmds_show,
165 .store = rcf_readcmds_store,
166};
167
168static struct rcf_sysfs_entry rcf_writecmds_entry = {
169 .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
170 .show = rcf_writecmds_show,
171 .store = rcf_writecmds_store,
172};
173
174static struct attribute *default_attrs[] = {
175 &rcf_readcmds_entry.attr,
176 &rcf_writecmds_entry.attr,
177 NULL,
178};
179
180#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
181
182static ssize_t
183rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
184{
185 struct rcf_sysfs_entry *entry = to_rcf(attr);
186 struct blk_scsi_cmd_filter *filter;
187
188 filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
189 if (entry->show)
190 return entry->show(filter, page);
191
192 return 0;
193}
194
195static ssize_t
196rcf_attr_store(struct kobject *kobj, struct attribute *attr,
197 const char *page, size_t length)
198{
199 struct rcf_sysfs_entry *entry = to_rcf(attr);
200 struct blk_scsi_cmd_filter *filter;
201
202 if (!capable(CAP_SYS_RAWIO))
203 return -EPERM;
204
205 if (!entry->store)
206 return -EINVAL;
207
208 filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
209 return entry->store(filter, page, length);
210}
211
212static struct sysfs_ops rcf_sysfs_ops = {
213 .show = rcf_attr_show,
214 .store = rcf_attr_store,
215};
216
217static struct kobj_type rcf_ktype = {
218 .sysfs_ops = &rcf_sysfs_ops,
219 .default_attrs = default_attrs,
220};
221
222#ifndef MAINTENANCE_IN_CMD
223#define MAINTENANCE_IN_CMD 0xa3
224#endif
225
226static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
227{
228 /* Basic read-only commands */
229 __set_bit(TEST_UNIT_READY, filter->read_ok);
230 __set_bit(REQUEST_SENSE, filter->read_ok);
231 __set_bit(READ_6, filter->read_ok);
232 __set_bit(READ_10, filter->read_ok);
233 __set_bit(READ_12, filter->read_ok);
234 __set_bit(READ_16, filter->read_ok);
235 __set_bit(READ_BUFFER, filter->read_ok);
236 __set_bit(READ_DEFECT_DATA, filter->read_ok);
237 __set_bit(READ_CAPACITY, filter->read_ok);
238 __set_bit(READ_LONG, filter->read_ok);
239 __set_bit(INQUIRY, filter->read_ok);
240 __set_bit(MODE_SENSE, filter->read_ok);
241 __set_bit(MODE_SENSE_10, filter->read_ok);
242 __set_bit(LOG_SENSE, filter->read_ok);
243 __set_bit(START_STOP, filter->read_ok);
244 __set_bit(GPCMD_VERIFY_10, filter->read_ok);
245 __set_bit(VERIFY_16, filter->read_ok);
246 __set_bit(REPORT_LUNS, filter->read_ok);
247 __set_bit(SERVICE_ACTION_IN, filter->read_ok);
248 __set_bit(RECEIVE_DIAGNOSTIC, filter->read_ok);
249 __set_bit(MAINTENANCE_IN_CMD, filter->read_ok);
250 __set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
251
252 /* Audio CD commands */
253 __set_bit(GPCMD_PLAY_CD, filter->read_ok);
254 __set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
255 __set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
256 __set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
257 __set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
258
259 /* CD/DVD data reading */
260 __set_bit(GPCMD_READ_CD, filter->read_ok);
261 __set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
262 __set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
263 __set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
264 __set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
265 __set_bit(GPCMD_READ_HEADER, filter->read_ok);
266 __set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
267 __set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
268 __set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
269 __set_bit(GPCMD_REPORT_KEY, filter->read_ok);
270 __set_bit(GPCMD_SCAN, filter->read_ok);
271 __set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
272 __set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
273 __set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
274 __set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
275 __set_bit(GPCMD_SEEK, filter->read_ok);
276 __set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
277
278 /* Basic writing commands */
279 __set_bit(WRITE_6, filter->write_ok);
280 __set_bit(WRITE_10, filter->write_ok);
281 __set_bit(WRITE_VERIFY, filter->write_ok);
282 __set_bit(WRITE_12, filter->write_ok);
283 __set_bit(WRITE_VERIFY_12, filter->write_ok);
284 __set_bit(WRITE_16, filter->write_ok);
285 __set_bit(WRITE_LONG, filter->write_ok);
286 __set_bit(WRITE_LONG_2, filter->write_ok);
287 __set_bit(ERASE, filter->write_ok);
288 __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
289 __set_bit(MODE_SELECT, filter->write_ok);
290 __set_bit(LOG_SELECT, filter->write_ok);
291 __set_bit(GPCMD_BLANK, filter->write_ok);
292 __set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
293 __set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
294 __set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
295 __set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
296 __set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
297 __set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
298 __set_bit(GPCMD_SEND_EVENT, filter->write_ok);
299 __set_bit(GPCMD_SEND_KEY, filter->write_ok);
300 __set_bit(GPCMD_SEND_OPC, filter->write_ok);
301 __set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
302 __set_bit(GPCMD_SET_SPEED, filter->write_ok);
303 __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
304 __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
305 __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
306}
307
308int blk_register_filter(struct gendisk *disk)
309{
310 int ret;
311 struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
312 struct kobject *parent = kobject_get(disk->holder_dir->parent);
313
314 if (!parent)
315 return -ENODEV;
316
317 ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
318 "%s", "cmd_filter");
319
320 if (ret < 0)
321 return ret;
322
323 rcf_set_defaults(filter);
324 return 0;
325}
326
327void blk_unregister_filter(struct gendisk *disk)
328{
329 struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
330
331 kobject_put(&filter->kobj);
332 kobject_put(disk->holder_dir->parent);
333}
334
diff --git a/block/elevator.c b/block/elevator.c
index 902dd1344d56..ed6f8f32d27e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -86,6 +86,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
86 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) 86 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
87 return 0; 87 return 0;
88 88
89 /*
90 * only merge integrity protected bio into ditto rq
91 */
92 if (bio_integrity(bio) != blk_integrity_rq(rq))
93 return 0;
94
89 if (!elv_iosched_allow_merge(rq, bio)) 95 if (!elv_iosched_allow_merge(rq, bio))
90 return 0; 96 return 0;
91 97
@@ -144,7 +150,7 @@ static struct elevator_type *elevator_get(const char *name)
144 else 150 else
145 sprintf(elv, "%s-iosched", name); 151 sprintf(elv, "%s-iosched", name);
146 152
147 request_module(elv); 153 request_module("%s", elv);
148 spin_lock(&elv_list_lock); 154 spin_lock(&elv_list_lock);
149 e = elevator_find(name); 155 e = elevator_find(name);
150 } 156 }
diff --git a/block/genhd.c b/block/genhd.c
index b922d4801c87..9074f384b097 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
189 disk->minors, NULL, exact_match, exact_lock, disk); 189 disk->minors, NULL, exact_match, exact_lock, disk);
190 register_disk(disk); 190 register_disk(disk);
191 blk_register_queue(disk); 191 blk_register_queue(disk);
192 blk_register_filter(disk);
192 193
193 bdi = &disk->queue->backing_dev_info; 194 bdi = &disk->queue->backing_dev_info;
194 bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); 195 bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
@@ -200,6 +201,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
200 201
201void unlink_gendisk(struct gendisk *disk) 202void unlink_gendisk(struct gendisk *disk)
202{ 203{
204 blk_unregister_filter(disk);
203 sysfs_remove_link(&disk->dev.kobj, "bdi"); 205 sysfs_remove_link(&disk->dev.kobj, "bdi");
204 bdi_unregister(&disk->queue->backing_dev_info); 206 bdi_unregister(&disk->queue->backing_dev_info);
205 blk_unregister_queue(disk); 207 blk_unregister_queue(disk);
@@ -400,6 +402,14 @@ static ssize_t disk_removable_show(struct device *dev,
400 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); 402 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
401} 403}
402 404
405static ssize_t disk_ro_show(struct device *dev,
406 struct device_attribute *attr, char *buf)
407{
408 struct gendisk *disk = dev_to_disk(dev);
409
410 return sprintf(buf, "%d\n", disk->policy ? 1 : 0);
411}
412
403static ssize_t disk_size_show(struct device *dev, 413static ssize_t disk_size_show(struct device *dev,
404 struct device_attribute *attr, char *buf) 414 struct device_attribute *attr, char *buf)
405{ 415{
@@ -472,6 +482,7 @@ static ssize_t disk_fail_store(struct device *dev,
472 482
473static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); 483static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
474static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); 484static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
485static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
475static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL); 486static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
476static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 487static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
477static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL); 488static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
@@ -483,6 +494,7 @@ static struct device_attribute dev_attr_fail =
483static struct attribute *disk_attrs[] = { 494static struct attribute *disk_attrs[] = {
484 &dev_attr_range.attr, 495 &dev_attr_range.attr,
485 &dev_attr_removable.attr, 496 &dev_attr_removable.attr,
497 &dev_attr_ro.attr,
486 &dev_attr_size.attr, 498 &dev_attr_size.attr,
487 &dev_attr_capability.attr, 499 &dev_attr_capability.attr,
488 &dev_attr_stat.attr, 500 &dev_attr_stat.attr,
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 78199c08ec92..c5b9bcfc0a6d 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -105,120 +105,12 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
105 return put_user(1, p); 105 return put_user(1, p);
106} 106}
107 107
108#define CMD_READ_SAFE 0x01
109#define CMD_WRITE_SAFE 0x02
110#define CMD_WARNED 0x04
111#define safe_for_read(cmd) [cmd] = CMD_READ_SAFE
112#define safe_for_write(cmd) [cmd] = CMD_WRITE_SAFE
113
114int blk_verify_command(unsigned char *cmd, int has_write_perm)
115{
116 static unsigned char cmd_type[256] = {
117
118 /* Basic read-only commands */
119 safe_for_read(TEST_UNIT_READY),
120 safe_for_read(REQUEST_SENSE),
121 safe_for_read(READ_6),
122 safe_for_read(READ_10),
123 safe_for_read(READ_12),
124 safe_for_read(READ_16),
125 safe_for_read(READ_BUFFER),
126 safe_for_read(READ_DEFECT_DATA),
127 safe_for_read(READ_LONG),
128 safe_for_read(INQUIRY),
129 safe_for_read(MODE_SENSE),
130 safe_for_read(MODE_SENSE_10),
131 safe_for_read(LOG_SENSE),
132 safe_for_read(START_STOP),
133 safe_for_read(GPCMD_VERIFY_10),
134 safe_for_read(VERIFY_16),
135
136 /* Audio CD commands */
137 safe_for_read(GPCMD_PLAY_CD),
138 safe_for_read(GPCMD_PLAY_AUDIO_10),
139 safe_for_read(GPCMD_PLAY_AUDIO_MSF),
140 safe_for_read(GPCMD_PLAY_AUDIO_TI),
141 safe_for_read(GPCMD_PAUSE_RESUME),
142
143 /* CD/DVD data reading */
144 safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
145 safe_for_read(GPCMD_READ_CD),
146 safe_for_read(GPCMD_READ_CD_MSF),
147 safe_for_read(GPCMD_READ_DISC_INFO),
148 safe_for_read(GPCMD_READ_CDVD_CAPACITY),
149 safe_for_read(GPCMD_READ_DVD_STRUCTURE),
150 safe_for_read(GPCMD_READ_HEADER),
151 safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
152 safe_for_read(GPCMD_READ_SUBCHANNEL),
153 safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
154 safe_for_read(GPCMD_REPORT_KEY),
155 safe_for_read(GPCMD_SCAN),
156 safe_for_read(GPCMD_GET_CONFIGURATION),
157 safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
158 safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
159 safe_for_read(GPCMD_GET_PERFORMANCE),
160 safe_for_read(GPCMD_SEEK),
161 safe_for_read(GPCMD_STOP_PLAY_SCAN),
162
163 /* Basic writing commands */
164 safe_for_write(WRITE_6),
165 safe_for_write(WRITE_10),
166 safe_for_write(WRITE_VERIFY),
167 safe_for_write(WRITE_12),
168 safe_for_write(WRITE_VERIFY_12),
169 safe_for_write(WRITE_16),
170 safe_for_write(WRITE_LONG),
171 safe_for_write(WRITE_LONG_2),
172 safe_for_write(ERASE),
173 safe_for_write(GPCMD_MODE_SELECT_10),
174 safe_for_write(MODE_SELECT),
175 safe_for_write(LOG_SELECT),
176 safe_for_write(GPCMD_BLANK),
177 safe_for_write(GPCMD_CLOSE_TRACK),
178 safe_for_write(GPCMD_FLUSH_CACHE),
179 safe_for_write(GPCMD_FORMAT_UNIT),
180 safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
181 safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
182 safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
183 safe_for_write(GPCMD_SEND_EVENT),
184 safe_for_write(GPCMD_SEND_KEY),
185 safe_for_write(GPCMD_SEND_OPC),
186 safe_for_write(GPCMD_SEND_CUE_SHEET),
187 safe_for_write(GPCMD_SET_SPEED),
188 safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
189 safe_for_write(GPCMD_LOAD_UNLOAD),
190 safe_for_write(GPCMD_SET_STREAMING),
191 };
192 unsigned char type = cmd_type[cmd[0]];
193
194 /* Anybody who can open the device can do a read-safe command */
195 if (type & CMD_READ_SAFE)
196 return 0;
197
198 /* Write-safe commands just require a writable open.. */
199 if ((type & CMD_WRITE_SAFE) && has_write_perm)
200 return 0;
201
202 /* And root can do any command.. */
203 if (capable(CAP_SYS_RAWIO))
204 return 0;
205
206 if (!type) {
207 cmd_type[cmd[0]] = CMD_WARNED;
208 printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
209 }
210
211 /* Otherwise fail it with an "Operation not permitted" */
212 return -EPERM;
213}
214EXPORT_SYMBOL_GPL(blk_verify_command);
215
216static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, 108static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
217 struct sg_io_hdr *hdr, int has_write_perm) 109 struct sg_io_hdr *hdr, struct file *file)
218{ 110{
219 if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) 111 if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
220 return -EFAULT; 112 return -EFAULT;
221 if (blk_verify_command(rq->cmd, has_write_perm)) 113 if (blk_verify_command(file, rq->cmd))
222 return -EPERM; 114 return -EPERM;
223 115
224 /* 116 /*
@@ -287,7 +179,7 @@ static int sg_io(struct file *file, struct request_queue *q,
287 struct gendisk *bd_disk, struct sg_io_hdr *hdr) 179 struct gendisk *bd_disk, struct sg_io_hdr *hdr)
288{ 180{
289 unsigned long start_time; 181 unsigned long start_time;
290 int writing = 0, ret = 0, has_write_perm = 0; 182 int writing = 0, ret = 0;
291 struct request *rq; 183 struct request *rq;
292 char sense[SCSI_SENSE_BUFFERSIZE]; 184 char sense[SCSI_SENSE_BUFFERSIZE];
293 struct bio *bio; 185 struct bio *bio;
@@ -316,10 +208,7 @@ static int sg_io(struct file *file, struct request_queue *q,
316 if (!rq) 208 if (!rq)
317 return -ENOMEM; 209 return -ENOMEM;
318 210
319 if (file) 211 if (blk_fill_sghdr_rq(q, rq, hdr, file)) {
320 has_write_perm = file->f_mode & FMODE_WRITE;
321
322 if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) {
323 blk_put_request(rq); 212 blk_put_request(rq);
324 return -EFAULT; 213 return -EFAULT;
325 } 214 }
@@ -451,7 +340,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
451 if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) 340 if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
452 goto error; 341 goto error;
453 342
454 err = blk_verify_command(rq->cmd, file->f_mode & FMODE_WRITE); 343 err = blk_verify_command(file, rq->cmd);
455 if (err) 344 if (err)
456 goto error; 345 goto error;
457 346