aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig11
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-core.c33
-rw-r--r--block/blk-integrity.c1
-rw-r--r--block/blk-merge.c6
-rw-r--r--block/blk-settings.c84
-rw-r--r--block/blk-sysfs.c11
-rw-r--r--block/bsg.c2
-rw-r--r--block/cfq-iosched.c178
-rw-r--r--block/cmd-filter.c233
-rw-r--r--block/elevator.c13
-rw-r--r--block/scsi_ioctl.c44
12 files changed, 233 insertions, 385 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 95a86adc33a1..9be0b56eaee1 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -48,9 +48,9 @@ config LBDAF
48 If unsure, say Y. 48 If unsure, say Y.
49 49
50config BLK_DEV_BSG 50config BLK_DEV_BSG
51 bool "Block layer SG support v4 (EXPERIMENTAL)" 51 bool "Block layer SG support v4"
52 depends on EXPERIMENTAL 52 default y
53 ---help--- 53 help
54 Saying Y here will enable generic SG (SCSI generic) v4 support 54 Saying Y here will enable generic SG (SCSI generic) v4 support
55 for any block device. 55 for any block device.
56 56
@@ -60,7 +60,10 @@ config BLK_DEV_BSG
60 protocols (e.g. Task Management Functions and SMP in Serial 60 protocols (e.g. Task Management Functions and SMP in Serial
61 Attached SCSI). 61 Attached SCSI).
62 62
63 If unsure, say N. 63 This option is required by recent UDEV versions to properly
64 access device serial numbers, etc.
65
66 If unsure, say Y.
64 67
65config BLK_DEV_INTEGRITY 68config BLK_DEV_INTEGRITY
66 bool "Block layer data integrity support" 69 bool "Block layer data integrity support"
diff --git a/block/Makefile b/block/Makefile
index e9fa4dd690f2..6c54ed0ff755 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ 5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ 7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8 ioctl.o genhd.o scsi_ioctl.o cmd-filter.o 8 ioctl.o genhd.o scsi_ioctl.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index b06cf5c2a829..e3299a77a0d8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -575,13 +575,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
575 return NULL; 575 return NULL;
576 } 576 }
577 577
578 /*
579 * if caller didn't supply a lock, they get per-queue locking with
580 * our embedded lock
581 */
582 if (!lock)
583 lock = &q->__queue_lock;
584
585 q->request_fn = rfn; 578 q->request_fn = rfn;
586 q->prep_rq_fn = NULL; 579 q->prep_rq_fn = NULL;
587 q->unplug_fn = generic_unplug_device; 580 q->unplug_fn = generic_unplug_device;
@@ -595,8 +588,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
595 588
596 q->sg_reserved_size = INT_MAX; 589 q->sg_reserved_size = INT_MAX;
597 590
598 blk_set_cmd_filter_defaults(&q->cmd_filter);
599
600 /* 591 /*
601 * all done 592 * all done
602 */ 593 */
@@ -1172,6 +1163,11 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1172 const int unplug = bio_unplug(bio); 1163 const int unplug = bio_unplug(bio);
1173 int rw_flags; 1164 int rw_flags;
1174 1165
1166 if (bio_barrier(bio) && bio_has_data(bio) &&
1167 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1168 bio_endio(bio, -EOPNOTSUPP);
1169 return 0;
1170 }
1175 /* 1171 /*
1176 * low level driver can indicate that it wants pages above a 1172 * low level driver can indicate that it wants pages above a
1177 * certain limit bounced to low memory (ie for highmem, or even 1173 * certain limit bounced to low memory (ie for highmem, or even
@@ -1472,11 +1468,6 @@ static inline void __generic_make_request(struct bio *bio)
1472 err = -EOPNOTSUPP; 1468 err = -EOPNOTSUPP;
1473 goto end_io; 1469 goto end_io;
1474 } 1470 }
1475 if (bio_barrier(bio) && bio_has_data(bio) &&
1476 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1477 err = -EOPNOTSUPP;
1478 goto end_io;
1479 }
1480 1471
1481 ret = q->make_request_fn(q, bio); 1472 ret = q->make_request_fn(q, bio);
1482 } while (ret); 1473 } while (ret);
@@ -2145,7 +2136,7 @@ bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2145{ 2136{
2146 return blk_end_bidi_request(rq, error, nr_bytes, 0); 2137 return blk_end_bidi_request(rq, error, nr_bytes, 0);
2147} 2138}
2148EXPORT_SYMBOL_GPL(blk_end_request); 2139EXPORT_SYMBOL(blk_end_request);
2149 2140
2150/** 2141/**
2151 * blk_end_request_all - Helper function for drives to finish the request. 2142 * blk_end_request_all - Helper function for drives to finish the request.
@@ -2166,7 +2157,7 @@ void blk_end_request_all(struct request *rq, int error)
2166 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); 2157 pending = blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2167 BUG_ON(pending); 2158 BUG_ON(pending);
2168} 2159}
2169EXPORT_SYMBOL_GPL(blk_end_request_all); 2160EXPORT_SYMBOL(blk_end_request_all);
2170 2161
2171/** 2162/**
2172 * blk_end_request_cur - Helper function to finish the current request chunk. 2163 * blk_end_request_cur - Helper function to finish the current request chunk.
@@ -2184,7 +2175,7 @@ bool blk_end_request_cur(struct request *rq, int error)
2184{ 2175{
2185 return blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 2176 return blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2186} 2177}
2187EXPORT_SYMBOL_GPL(blk_end_request_cur); 2178EXPORT_SYMBOL(blk_end_request_cur);
2188 2179
2189/** 2180/**
2190 * __blk_end_request - Helper function for drivers to complete the request. 2181 * __blk_end_request - Helper function for drivers to complete the request.
@@ -2203,7 +2194,7 @@ bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
2203{ 2194{
2204 return __blk_end_bidi_request(rq, error, nr_bytes, 0); 2195 return __blk_end_bidi_request(rq, error, nr_bytes, 0);
2205} 2196}
2206EXPORT_SYMBOL_GPL(__blk_end_request); 2197EXPORT_SYMBOL(__blk_end_request);
2207 2198
2208/** 2199/**
2209 * __blk_end_request_all - Helper function for drives to finish the request. 2200 * __blk_end_request_all - Helper function for drives to finish the request.
@@ -2224,7 +2215,7 @@ void __blk_end_request_all(struct request *rq, int error)
2224 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes); 2215 pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
2225 BUG_ON(pending); 2216 BUG_ON(pending);
2226} 2217}
2227EXPORT_SYMBOL_GPL(__blk_end_request_all); 2218EXPORT_SYMBOL(__blk_end_request_all);
2228 2219
2229/** 2220/**
2230 * __blk_end_request_cur - Helper function to finish the current request chunk. 2221 * __blk_end_request_cur - Helper function to finish the current request chunk.
@@ -2243,7 +2234,7 @@ bool __blk_end_request_cur(struct request *rq, int error)
2243{ 2234{
2244 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq)); 2235 return __blk_end_request(rq, error, blk_rq_cur_bytes(rq));
2245} 2236}
2246EXPORT_SYMBOL_GPL(__blk_end_request_cur); 2237EXPORT_SYMBOL(__blk_end_request_cur);
2247 2238
2248void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2239void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2249 struct bio *bio) 2240 struct bio *bio)
@@ -2365,7 +2356,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2365 __bio_clone(bio, bio_src); 2356 __bio_clone(bio, bio_src);
2366 2357
2367 if (bio_integrity(bio_src) && 2358 if (bio_integrity(bio_src) &&
2368 bio_integrity_clone(bio, bio_src, gfp_mask)) 2359 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2369 goto free_and_out; 2360 goto free_and_out;
2370 2361
2371 if (bio_ctr && bio_ctr(bio, bio_src, data)) 2362 if (bio_ctr && bio_ctr(bio, bio_src, data))
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 73e28d355688..15c630813b1c 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -379,6 +379,7 @@ void blk_integrity_unregister(struct gendisk *disk)
379 379
380 kobject_uevent(&bi->kobj, KOBJ_REMOVE); 380 kobject_uevent(&bi->kobj, KOBJ_REMOVE);
381 kobject_del(&bi->kobj); 381 kobject_del(&bi->kobj);
382 kobject_put(&bi->kobj);
382 kmem_cache_free(integrity_cachep, bi); 383 kmem_cache_free(integrity_cachep, bi);
383 disk->integrity = NULL; 384 disk->integrity = NULL;
384} 385}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 39ce64432ba6..e1999679a4d5 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -350,6 +350,12 @@ static int attempt_merge(struct request_queue *q, struct request *req,
350 if (blk_integrity_rq(req) != blk_integrity_rq(next)) 350 if (blk_integrity_rq(req) != blk_integrity_rq(next))
351 return 0; 351 return 0;
352 352
353 /* don't merge requests of different failfast settings */
354 if (blk_failfast_dev(req) != blk_failfast_dev(next) ||
355 blk_failfast_transport(req) != blk_failfast_transport(next) ||
356 blk_failfast_driver(req) != blk_failfast_driver(next))
357 return 0;
358
353 /* 359 /*
354 * If we are allowed to merge, then append bio list 360 * If we are allowed to merge, then append bio list
355 * from next to rq and release next. merge_requests_fn 361 * from next to rq and release next. merge_requests_fn
diff --git a/block/blk-settings.c b/block/blk-settings.c
index bd582a7f5310..476d87065073 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -7,6 +7,7 @@
7#include <linux/bio.h> 7#include <linux/bio.h>
8#include <linux/blkdev.h> 8#include <linux/blkdev.h>
9#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */ 9#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
10#include <linux/gcd.h>
10 11
11#include "blk.h" 12#include "blk.h"
12 13
@@ -165,6 +166,13 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
165 blk_set_default_limits(&q->limits); 166 blk_set_default_limits(&q->limits);
166 167
167 /* 168 /*
169 * If the caller didn't supply a lock, fall back to our embedded
170 * per-queue locks
171 */
172 if (!q->queue_lock)
173 q->queue_lock = &q->__queue_lock;
174
175 /*
168 * by default assume old behaviour and bounce for any highmem page 176 * by default assume old behaviour and bounce for any highmem page
169 */ 177 */
170 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); 178 blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
@@ -377,8 +385,8 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
377EXPORT_SYMBOL(blk_queue_alignment_offset); 385EXPORT_SYMBOL(blk_queue_alignment_offset);
378 386
379/** 387/**
380 * blk_queue_io_min - set minimum request size for the queue 388 * blk_limits_io_min - set minimum request size for a device
381 * @q: the request queue for the device 389 * @limits: the queue limits
382 * @min: smallest I/O size in bytes 390 * @min: smallest I/O size in bytes
383 * 391 *
384 * Description: 392 * Description:
@@ -387,15 +395,35 @@ EXPORT_SYMBOL(blk_queue_alignment_offset);
387 * smallest I/O the device can perform without incurring a performance 395 * smallest I/O the device can perform without incurring a performance
388 * penalty. 396 * penalty.
389 */ 397 */
390void blk_queue_io_min(struct request_queue *q, unsigned int min) 398void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
391{ 399{
392 q->limits.io_min = min; 400 limits->io_min = min;
393 401
394 if (q->limits.io_min < q->limits.logical_block_size) 402 if (limits->io_min < limits->logical_block_size)
395 q->limits.io_min = q->limits.logical_block_size; 403 limits->io_min = limits->logical_block_size;
396 404
397 if (q->limits.io_min < q->limits.physical_block_size) 405 if (limits->io_min < limits->physical_block_size)
398 q->limits.io_min = q->limits.physical_block_size; 406 limits->io_min = limits->physical_block_size;
407}
408EXPORT_SYMBOL(blk_limits_io_min);
409
410/**
411 * blk_queue_io_min - set minimum request size for the queue
412 * @q: the request queue for the device
413 * @min: smallest I/O size in bytes
414 *
415 * Description:
416 * Storage devices may report a granularity or preferred minimum I/O
417 * size which is the smallest request the device can perform without
418 * incurring a performance penalty. For disk drives this is often the
419 * physical block size. For RAID arrays it is often the stripe chunk
420 * size. A properly aligned multiple of minimum_io_size is the
421 * preferred request size for workloads where a high number of I/O
422 * operations is desired.
423 */
424void blk_queue_io_min(struct request_queue *q, unsigned int min)
425{
426 blk_limits_io_min(&q->limits, min);
399} 427}
400EXPORT_SYMBOL(blk_queue_io_min); 428EXPORT_SYMBOL(blk_queue_io_min);
401 429
@@ -405,8 +433,12 @@ EXPORT_SYMBOL(blk_queue_io_min);
405 * @opt: optimal request size in bytes 433 * @opt: optimal request size in bytes
406 * 434 *
407 * Description: 435 * Description:
408 * Drivers can call this function to set the preferred I/O request 436 * Storage devices may report an optimal I/O size, which is the
409 * size for devices that report such a value. 437 * device's preferred unit for sustained I/O. This is rarely reported
438 * for disk drives. For RAID arrays it is usually the stripe width or
439 * the internal track size. A properly aligned multiple of
440 * optimal_io_size is the preferred request size for workloads where
441 * sustained throughput is desired.
410 */ 442 */
411void blk_queue_io_opt(struct request_queue *q, unsigned int opt) 443void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
412{ 444{
@@ -426,27 +458,7 @@ EXPORT_SYMBOL(blk_queue_io_opt);
426 **/ 458 **/
427void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) 459void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
428{ 460{
429 /* zero is "infinity" */ 461 blk_stack_limits(&t->limits, &b->limits, 0);
430 t->limits.max_sectors = min_not_zero(queue_max_sectors(t),
431 queue_max_sectors(b));
432
433 t->limits.max_hw_sectors = min_not_zero(queue_max_hw_sectors(t),
434 queue_max_hw_sectors(b));
435
436 t->limits.seg_boundary_mask = min_not_zero(queue_segment_boundary(t),
437 queue_segment_boundary(b));
438
439 t->limits.max_phys_segments = min_not_zero(queue_max_phys_segments(t),
440 queue_max_phys_segments(b));
441
442 t->limits.max_hw_segments = min_not_zero(queue_max_hw_segments(t),
443 queue_max_hw_segments(b));
444
445 t->limits.max_segment_size = min_not_zero(queue_max_segment_size(t),
446 queue_max_segment_size(b));
447
448 t->limits.logical_block_size = max(queue_logical_block_size(t),
449 queue_logical_block_size(b));
450 462
451 if (!t->queue_lock) 463 if (!t->queue_lock)
452 WARN_ON_ONCE(1); 464 WARN_ON_ONCE(1);
@@ -516,6 +528,16 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
516 return -1; 528 return -1;
517 } 529 }
518 530
531 /* Find lcm() of optimal I/O size */
532 if (t->io_opt && b->io_opt)
533 t->io_opt = (t->io_opt * b->io_opt) / gcd(t->io_opt, b->io_opt);
534 else if (b->io_opt)
535 t->io_opt = b->io_opt;
536
537 /* Verify that optimal I/O size is a multiple of io_min */
538 if (t->io_min && t->io_opt % t->io_min)
539 return -1;
540
519 return 0; 541 return 0;
520} 542}
521EXPORT_SYMBOL(blk_stack_limits); 543EXPORT_SYMBOL(blk_stack_limits);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b1cd04087d6a..418d63619680 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -16,9 +16,9 @@ struct queue_sysfs_entry {
16}; 16};
17 17
18static ssize_t 18static ssize_t
19queue_var_show(unsigned int var, char *page) 19queue_var_show(unsigned long var, char *page)
20{ 20{
21 return sprintf(page, "%d\n", var); 21 return sprintf(page, "%lu\n", var);
22} 22}
23 23
24static ssize_t 24static ssize_t
@@ -77,7 +77,8 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
77 77
78static ssize_t queue_ra_show(struct request_queue *q, char *page) 78static ssize_t queue_ra_show(struct request_queue *q, char *page)
79{ 79{
80 int ra_kb = q->backing_dev_info.ra_pages << (PAGE_CACHE_SHIFT - 10); 80 unsigned long ra_kb = q->backing_dev_info.ra_pages <<
81 (PAGE_CACHE_SHIFT - 10);
81 82
82 return queue_var_show(ra_kb, (page)); 83 return queue_var_show(ra_kb, (page));
83} 84}
@@ -189,9 +190,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
189 190
190static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) 191static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
191{ 192{
192 unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); 193 bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
193 194
194 return queue_var_show(set != 0, page); 195 return queue_var_show(set, page);
195} 196}
196 197
197static ssize_t 198static ssize_t
diff --git a/block/bsg.c b/block/bsg.c
index e7d475254248..5f184bb3ff9e 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -186,7 +186,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
186 return -EFAULT; 186 return -EFAULT;
187 187
188 if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { 188 if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
189 if (blk_verify_command(&q->cmd_filter, rq->cmd, has_write_perm)) 189 if (blk_verify_command(rq->cmd, has_write_perm))
190 return -EPERM; 190 return -EPERM;
191 } else if (!capable(CAP_SYS_RAWIO)) 191 } else if (!capable(CAP_SYS_RAWIO))
192 return -EPERM; 192 return -EPERM;
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 833ec18eaa63..fd7080ed7935 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -71,6 +71,51 @@ struct cfq_rb_root {
71#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, } 71#define CFQ_RB_ROOT (struct cfq_rb_root) { RB_ROOT, NULL, }
72 72
73/* 73/*
74 * Per process-grouping structure
75 */
76struct cfq_queue {
77 /* reference count */
78 atomic_t ref;
79 /* various state flags, see below */
80 unsigned int flags;
81 /* parent cfq_data */
82 struct cfq_data *cfqd;
83 /* service_tree member */
84 struct rb_node rb_node;
85 /* service_tree key */
86 unsigned long rb_key;
87 /* prio tree member */
88 struct rb_node p_node;
89 /* prio tree root we belong to, if any */
90 struct rb_root *p_root;
91 /* sorted list of pending requests */
92 struct rb_root sort_list;
93 /* if fifo isn't expired, next request to serve */
94 struct request *next_rq;
95 /* requests queued in sort_list */
96 int queued[2];
97 /* currently allocated requests */
98 int allocated[2];
99 /* fifo list of requests in sort_list */
100 struct list_head fifo;
101
102 unsigned long slice_end;
103 long slice_resid;
104 unsigned int slice_dispatch;
105
106 /* pending metadata requests */
107 int meta_pending;
108 /* number of requests that are on the dispatch list or inside driver */
109 int dispatched;
110
111 /* io prio of this group */
112 unsigned short ioprio, org_ioprio;
113 unsigned short ioprio_class, org_ioprio_class;
114
115 pid_t pid;
116};
117
118/*
74 * Per block device queue structure 119 * Per block device queue structure
75 */ 120 */
76struct cfq_data { 121struct cfq_data {
@@ -135,51 +180,11 @@ struct cfq_data {
135 unsigned int cfq_slice_idle; 180 unsigned int cfq_slice_idle;
136 181
137 struct list_head cic_list; 182 struct list_head cic_list;
138};
139 183
140/* 184 /*
141 * Per process-grouping structure 185 * Fallback dummy cfqq for extreme OOM conditions
142 */ 186 */
143struct cfq_queue { 187 struct cfq_queue oom_cfqq;
144 /* reference count */
145 atomic_t ref;
146 /* various state flags, see below */
147 unsigned int flags;
148 /* parent cfq_data */
149 struct cfq_data *cfqd;
150 /* service_tree member */
151 struct rb_node rb_node;
152 /* service_tree key */
153 unsigned long rb_key;
154 /* prio tree member */
155 struct rb_node p_node;
156 /* prio tree root we belong to, if any */
157 struct rb_root *p_root;
158 /* sorted list of pending requests */
159 struct rb_root sort_list;
160 /* if fifo isn't expired, next request to serve */
161 struct request *next_rq;
162 /* requests queued in sort_list */
163 int queued[2];
164 /* currently allocated requests */
165 int allocated[2];
166 /* fifo list of requests in sort_list */
167 struct list_head fifo;
168
169 unsigned long slice_end;
170 long slice_resid;
171 unsigned int slice_dispatch;
172
173 /* pending metadata requests */
174 int meta_pending;
175 /* number of requests that are on the dispatch list or inside driver */
176 int dispatched;
177
178 /* io prio of this group */
179 unsigned short ioprio, org_ioprio;
180 unsigned short ioprio_class, org_ioprio_class;
181
182 pid_t pid;
183}; 188};
184 189
185enum cfqq_state_flags { 190enum cfqq_state_flags {
@@ -1641,6 +1646,26 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc)
1641 ioc->ioprio_changed = 0; 1646 ioc->ioprio_changed = 0;
1642} 1647}
1643 1648
1649static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
1650 pid_t pid, int is_sync)
1651{
1652 RB_CLEAR_NODE(&cfqq->rb_node);
1653 RB_CLEAR_NODE(&cfqq->p_node);
1654 INIT_LIST_HEAD(&cfqq->fifo);
1655
1656 atomic_set(&cfqq->ref, 0);
1657 cfqq->cfqd = cfqd;
1658
1659 cfq_mark_cfqq_prio_changed(cfqq);
1660
1661 if (is_sync) {
1662 if (!cfq_class_idle(cfqq))
1663 cfq_mark_cfqq_idle_window(cfqq);
1664 cfq_mark_cfqq_sync(cfqq);
1665 }
1666 cfqq->pid = pid;
1667}
1668
1644static struct cfq_queue * 1669static struct cfq_queue *
1645cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync, 1670cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
1646 struct io_context *ioc, gfp_t gfp_mask) 1671 struct io_context *ioc, gfp_t gfp_mask)
@@ -1653,56 +1678,40 @@ retry:
1653 /* cic always exists here */ 1678 /* cic always exists here */
1654 cfqq = cic_to_cfqq(cic, is_sync); 1679 cfqq = cic_to_cfqq(cic, is_sync);
1655 1680
1656 if (!cfqq) { 1681 /*
1682 * Always try a new alloc if we fell back to the OOM cfqq
1683 * originally, since it should just be a temporary situation.
1684 */
1685 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
1686 cfqq = NULL;
1657 if (new_cfqq) { 1687 if (new_cfqq) {
1658 cfqq = new_cfqq; 1688 cfqq = new_cfqq;
1659 new_cfqq = NULL; 1689 new_cfqq = NULL;
1660 } else if (gfp_mask & __GFP_WAIT) { 1690 } else if (gfp_mask & __GFP_WAIT) {
1661 /*
1662 * Inform the allocator of the fact that we will
1663 * just repeat this allocation if it fails, to allow
1664 * the allocator to do whatever it needs to attempt to
1665 * free memory.
1666 */
1667 spin_unlock_irq(cfqd->queue->queue_lock); 1691 spin_unlock_irq(cfqd->queue->queue_lock);
1668 new_cfqq = kmem_cache_alloc_node(cfq_pool, 1692 new_cfqq = kmem_cache_alloc_node(cfq_pool,
1669 gfp_mask | __GFP_NOFAIL | __GFP_ZERO, 1693 gfp_mask | __GFP_ZERO,
1670 cfqd->queue->node); 1694 cfqd->queue->node);
1671 spin_lock_irq(cfqd->queue->queue_lock); 1695 spin_lock_irq(cfqd->queue->queue_lock);
1672 goto retry; 1696 if (new_cfqq)
1697 goto retry;
1673 } else { 1698 } else {
1674 cfqq = kmem_cache_alloc_node(cfq_pool, 1699 cfqq = kmem_cache_alloc_node(cfq_pool,
1675 gfp_mask | __GFP_ZERO, 1700 gfp_mask | __GFP_ZERO,
1676 cfqd->queue->node); 1701 cfqd->queue->node);
1677 if (!cfqq)
1678 goto out;
1679 } 1702 }
1680 1703
1681 RB_CLEAR_NODE(&cfqq->rb_node); 1704 if (cfqq) {
1682 RB_CLEAR_NODE(&cfqq->p_node); 1705 cfq_init_cfqq(cfqd, cfqq, current->pid, is_sync);
1683 INIT_LIST_HEAD(&cfqq->fifo); 1706 cfq_init_prio_data(cfqq, ioc);
1684 1707 cfq_log_cfqq(cfqd, cfqq, "alloced");
1685 atomic_set(&cfqq->ref, 0); 1708 } else
1686 cfqq->cfqd = cfqd; 1709 cfqq = &cfqd->oom_cfqq;
1687
1688 cfq_mark_cfqq_prio_changed(cfqq);
1689
1690 cfq_init_prio_data(cfqq, ioc);
1691
1692 if (is_sync) {
1693 if (!cfq_class_idle(cfqq))
1694 cfq_mark_cfqq_idle_window(cfqq);
1695 cfq_mark_cfqq_sync(cfqq);
1696 }
1697 cfqq->pid = current->pid;
1698 cfq_log_cfqq(cfqd, cfqq, "alloced");
1699 } 1710 }
1700 1711
1701 if (new_cfqq) 1712 if (new_cfqq)
1702 kmem_cache_free(cfq_pool, new_cfqq); 1713 kmem_cache_free(cfq_pool, new_cfqq);
1703 1714
1704out:
1705 WARN_ON((gfp_mask & __GFP_WAIT) && !cfqq);
1706 return cfqq; 1715 return cfqq;
1707} 1716}
1708 1717
@@ -1735,11 +1744,8 @@ cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
1735 cfqq = *async_cfqq; 1744 cfqq = *async_cfqq;
1736 } 1745 }
1737 1746
1738 if (!cfqq) { 1747 if (!cfqq)
1739 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask); 1748 cfqq = cfq_find_alloc_queue(cfqd, is_sync, ioc, gfp_mask);
1740 if (!cfqq)
1741 return NULL;
1742 }
1743 1749
1744 /* 1750 /*
1745 * pin the queue now that it's allocated, scheduler exit will prune it 1751 * pin the queue now that it's allocated, scheduler exit will prune it
@@ -2305,12 +2311,8 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
2305 goto queue_fail; 2311 goto queue_fail;
2306 2312
2307 cfqq = cic_to_cfqq(cic, is_sync); 2313 cfqq = cic_to_cfqq(cic, is_sync);
2308 if (!cfqq) { 2314 if (!cfqq || cfqq == &cfqd->oom_cfqq) {
2309 cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask); 2315 cfqq = cfq_get_queue(cfqd, is_sync, cic->ioc, gfp_mask);
2310
2311 if (!cfqq)
2312 goto queue_fail;
2313
2314 cic_set_cfqq(cic, cfqq, is_sync); 2316 cic_set_cfqq(cic, cfqq, is_sync);
2315 } 2317 }
2316 2318
@@ -2465,6 +2467,14 @@ static void *cfq_init_queue(struct request_queue *q)
2465 for (i = 0; i < CFQ_PRIO_LISTS; i++) 2467 for (i = 0; i < CFQ_PRIO_LISTS; i++)
2466 cfqd->prio_trees[i] = RB_ROOT; 2468 cfqd->prio_trees[i] = RB_ROOT;
2467 2469
2470 /*
2471 * Our fallback cfqq if cfq_find_alloc_queue() runs into OOM issues.
2472 * Grab a permanent reference to it, so that the normal code flow
2473 * will not attempt to free it.
2474 */
2475 cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0);
2476 atomic_inc(&cfqd->oom_cfqq.ref);
2477
2468 INIT_LIST_HEAD(&cfqd->cic_list); 2478 INIT_LIST_HEAD(&cfqd->cic_list);
2469 2479
2470 cfqd->queue = q; 2480 cfqd->queue = q;
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
deleted file mode 100644
index 572bbc2f900d..000000000000
--- a/block/cmd-filter.c
+++ /dev/null
@@ -1,233 +0,0 @@
1/*
2 * Copyright 2004 Peter M. Jones <pjones@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 *
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public Licens
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
17 *
18 */
19
20#include <linux/list.h>
21#include <linux/genhd.h>
22#include <linux/spinlock.h>
23#include <linux/capability.h>
24#include <linux/bitops.h>
25#include <linux/blkdev.h>
26
27#include <scsi/scsi.h>
28#include <linux/cdrom.h>
29
30int blk_verify_command(struct blk_cmd_filter *filter,
31 unsigned char *cmd, fmode_t has_write_perm)
32{
33 /* root can do any command. */
34 if (capable(CAP_SYS_RAWIO))
35 return 0;
36
37 /* if there's no filter set, assume we're filtering everything out */
38 if (!filter)
39 return -EPERM;
40
41 /* Anybody who can open the device can do a read-safe command */
42 if (test_bit(cmd[0], filter->read_ok))
43 return 0;
44
45 /* Write-safe commands require a writable open */
46 if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
47 return 0;
48
49 return -EPERM;
50}
51EXPORT_SYMBOL(blk_verify_command);
52
53#if 0
54/* and now, the sysfs stuff */
55static ssize_t rcf_cmds_show(struct blk_cmd_filter *filter, char *page,
56 int rw)
57{
58 char *npage = page;
59 unsigned long *okbits;
60 int i;
61
62 if (rw == READ)
63 okbits = filter->read_ok;
64 else
65 okbits = filter->write_ok;
66
67 for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
68 if (test_bit(i, okbits)) {
69 npage += sprintf(npage, "0x%02x", i);
70 if (i < BLK_SCSI_MAX_CMDS - 1)
71 sprintf(npage++, " ");
72 }
73 }
74
75 if (npage != page)
76 npage += sprintf(npage, "\n");
77
78 return npage - page;
79}
80
81static ssize_t rcf_readcmds_show(struct blk_cmd_filter *filter, char *page)
82{
83 return rcf_cmds_show(filter, page, READ);
84}
85
86static ssize_t rcf_writecmds_show(struct blk_cmd_filter *filter,
87 char *page)
88{
89 return rcf_cmds_show(filter, page, WRITE);
90}
91
92static ssize_t rcf_cmds_store(struct blk_cmd_filter *filter,
93 const char *page, size_t count, int rw)
94{
95 unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
96 int cmd, set;
97 char *p, *status;
98
99 if (rw == READ) {
100 memcpy(&okbits, filter->read_ok, sizeof(okbits));
101 target_okbits = filter->read_ok;
102 } else {
103 memcpy(&okbits, filter->write_ok, sizeof(okbits));
104 target_okbits = filter->write_ok;
105 }
106
107 while ((p = strsep((char **)&page, " ")) != NULL) {
108 set = 1;
109
110 if (p[0] == '+') {
111 p++;
112 } else if (p[0] == '-') {
113 set = 0;
114 p++;
115 }
116
117 cmd = simple_strtol(p, &status, 16);
118
119 /* either of these cases means invalid input, so do nothing. */
120 if ((status == p) || cmd >= BLK_SCSI_MAX_CMDS)
121 return -EINVAL;
122
123 if (set)
124 __set_bit(cmd, okbits);
125 else
126 __clear_bit(cmd, okbits);
127 }
128
129 memcpy(target_okbits, okbits, sizeof(okbits));
130 return count;
131}
132
133static ssize_t rcf_readcmds_store(struct blk_cmd_filter *filter,
134 const char *page, size_t count)
135{
136 return rcf_cmds_store(filter, page, count, READ);
137}
138
139static ssize_t rcf_writecmds_store(struct blk_cmd_filter *filter,
140 const char *page, size_t count)
141{
142 return rcf_cmds_store(filter, page, count, WRITE);
143}
144
145struct rcf_sysfs_entry {
146 struct attribute attr;
147 ssize_t (*show)(struct blk_cmd_filter *, char *);
148 ssize_t (*store)(struct blk_cmd_filter *, const char *, size_t);
149};
150
151static struct rcf_sysfs_entry rcf_readcmds_entry = {
152 .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
153 .show = rcf_readcmds_show,
154 .store = rcf_readcmds_store,
155};
156
157static struct rcf_sysfs_entry rcf_writecmds_entry = {
158 .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
159 .show = rcf_writecmds_show,
160 .store = rcf_writecmds_store,
161};
162
163static struct attribute *default_attrs[] = {
164 &rcf_readcmds_entry.attr,
165 &rcf_writecmds_entry.attr,
166 NULL,
167};
168
169#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
170
171static ssize_t
172rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
173{
174 struct rcf_sysfs_entry *entry = to_rcf(attr);
175 struct blk_cmd_filter *filter;
176
177 filter = container_of(kobj, struct blk_cmd_filter, kobj);
178 if (entry->show)
179 return entry->show(filter, page);
180
181 return 0;
182}
183
184static ssize_t
185rcf_attr_store(struct kobject *kobj, struct attribute *attr,
186 const char *page, size_t length)
187{
188 struct rcf_sysfs_entry *entry = to_rcf(attr);
189 struct blk_cmd_filter *filter;
190
191 if (!capable(CAP_SYS_RAWIO))
192 return -EPERM;
193
194 if (!entry->store)
195 return -EINVAL;
196
197 filter = container_of(kobj, struct blk_cmd_filter, kobj);
198 return entry->store(filter, page, length);
199}
200
201static struct sysfs_ops rcf_sysfs_ops = {
202 .show = rcf_attr_show,
203 .store = rcf_attr_store,
204};
205
206static struct kobj_type rcf_ktype = {
207 .sysfs_ops = &rcf_sysfs_ops,
208 .default_attrs = default_attrs,
209};
210
211int blk_register_filter(struct gendisk *disk)
212{
213 int ret;
214 struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
215
216 ret = kobject_init_and_add(&filter->kobj, &rcf_ktype,
217 &disk_to_dev(disk)->kobj,
218 "%s", "cmd_filter");
219 if (ret < 0)
220 return ret;
221
222 return 0;
223}
224EXPORT_SYMBOL(blk_register_filter);
225
226void blk_unregister_filter(struct gendisk *disk)
227{
228 struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
229
230 kobject_put(&filter->kobj);
231}
232EXPORT_SYMBOL(blk_unregister_filter);
233#endif
diff --git a/block/elevator.c b/block/elevator.c
index ca861927ba41..2d511f9105e1 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -100,6 +100,19 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
100 if (bio_integrity(bio) != blk_integrity_rq(rq)) 100 if (bio_integrity(bio) != blk_integrity_rq(rq))
101 return 0; 101 return 0;
102 102
103 /*
104 * Don't merge if failfast settings don't match.
105 *
106 * FIXME: The negation in front of each condition is necessary
107 * because bio and request flags use different bit positions
108 * and the accessors return those bits directly. This
109 * ugliness will soon go away.
110 */
111 if (!bio_failfast_dev(bio) != !blk_failfast_dev(rq) ||
112 !bio_failfast_transport(bio) != !blk_failfast_transport(rq) ||
113 !bio_failfast_driver(bio) != !blk_failfast_driver(rq))
114 return 0;
115
103 if (!elv_iosched_allow_merge(rq, bio)) 116 if (!elv_iosched_allow_merge(rq, bio))
104 return 0; 117 return 0;
105 118
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 5f8e798ede4e..e5b10017a50b 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -32,6 +32,11 @@
32#include <scsi/scsi_ioctl.h> 32#include <scsi/scsi_ioctl.h>
33#include <scsi/scsi_cmnd.h> 33#include <scsi/scsi_cmnd.h>
34 34
35struct blk_cmd_filter {
36 unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
37 unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
38} blk_default_cmd_filter;
39
35/* Command group 3 is reserved and should never be used. */ 40/* Command group 3 is reserved and should never be used. */
36const unsigned char scsi_command_size_tbl[8] = 41const unsigned char scsi_command_size_tbl[8] =
37{ 42{
@@ -105,7 +110,7 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
105 return put_user(1, p); 110 return put_user(1, p);
106} 111}
107 112
108void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) 113static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
109{ 114{
110 /* Basic read-only commands */ 115 /* Basic read-only commands */
111 __set_bit(TEST_UNIT_READY, filter->read_ok); 116 __set_bit(TEST_UNIT_READY, filter->read_ok);
@@ -187,14 +192,37 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
187 __set_bit(GPCMD_SET_STREAMING, filter->write_ok); 192 __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
188 __set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok); 193 __set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok);
189} 194}
190EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults); 195
196int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm)
197{
198 struct blk_cmd_filter *filter = &blk_default_cmd_filter;
199
200 /* root can do any command. */
201 if (capable(CAP_SYS_RAWIO))
202 return 0;
203
204 /* if there's no filter set, assume we're filtering everything out */
205 if (!filter)
206 return -EPERM;
207
208 /* Anybody who can open the device can do a read-safe command */
209 if (test_bit(cmd[0], filter->read_ok))
210 return 0;
211
212 /* Write-safe commands require a writable open */
213 if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
214 return 0;
215
216 return -EPERM;
217}
218EXPORT_SYMBOL(blk_verify_command);
191 219
192static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, 220static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
193 struct sg_io_hdr *hdr, fmode_t mode) 221 struct sg_io_hdr *hdr, fmode_t mode)
194{ 222{
195 if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len)) 223 if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
196 return -EFAULT; 224 return -EFAULT;
197 if (blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE)) 225 if (blk_verify_command(rq->cmd, mode & FMODE_WRITE))
198 return -EPERM; 226 return -EPERM;
199 227
200 /* 228 /*
@@ -427,7 +455,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
427 if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) 455 if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
428 goto error; 456 goto error;
429 457
430 err = blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE); 458 err = blk_verify_command(rq->cmd, mode & FMODE_WRITE);
431 if (err) 459 if (err)
432 goto error; 460 goto error;
433 461
@@ -645,5 +673,11 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
645 blk_put_queue(q); 673 blk_put_queue(q);
646 return err; 674 return err;
647} 675}
648
649EXPORT_SYMBOL(scsi_cmd_ioctl); 676EXPORT_SYMBOL(scsi_cmd_ioctl);
677
678int __init blk_scsi_ioctl_init(void)
679{
680 blk_set_cmd_filter_defaults(&blk_default_cmd_filter);
681 return 0;
682}
683fs_initcall(blk_scsi_ioctl_init);