aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/block/biodoc.txt6
-rw-r--r--arch/alpha/include/asm/io.h3
-rw-r--r--arch/s390/mm/pgtable.c4
-rw-r--r--block/Kconfig23
-rw-r--r--block/as-iosched.c10
-rw-r--r--block/blk-barrier.c120
-rw-r--r--block/blk-core.c63
-rw-r--r--block/blk-settings.c6
-rw-r--r--block/blk-softirq.c2
-rw-r--r--block/blk-sysfs.c7
-rw-r--r--block/blk-tag.c1
-rw-r--r--block/blk-timeout.c21
-rw-r--r--block/cfq-iosched.c26
-rw-r--r--block/compat_ioctl.c2
-rw-r--r--block/deadline-iosched.c6
-rw-r--r--block/elevator.c73
-rw-r--r--block/genhd.c23
-rw-r--r--block/ioctl.c2
-rw-r--r--block/noop-iosched.c2
-rw-r--r--block/scsi_ioctl.c4
-rw-r--r--drivers/block/cciss.c88
-rw-r--r--drivers/block/cciss.h4
-rw-r--r--drivers/block/cciss_cmd.h3
-rw-r--r--drivers/block/loop.c39
-rw-r--r--drivers/block/nbd.c10
-rw-r--r--drivers/block/virtio_blk.c2
-rw-r--r--drivers/block/xen-blkfront.c8
-rw-r--r--drivers/cdrom/cdrom.c703
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/md/dm-io.c2
-rw-r--r--drivers/md/dm.c2
-rw-r--r--fs/aio.c100
-rw-r--r--fs/bio-integrity.c2
-rw-r--r--fs/bio.c320
-rw-r--r--fs/buffer.c19
-rw-r--r--fs/ext4/super.c8
-rw-r--r--include/linux/aio.h5
-rw-r--r--include/linux/bio.h26
-rw-r--r--include/linux/blkdev.h52
-rw-r--r--include/linux/buffer_head.h1
-rw-r--r--include/linux/elevator.h8
-rw-r--r--include/linux/genhd.h1
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/types.h11
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c4
-rw-r--r--mm/bounce.c9
47 files changed, 1089 insertions, 751 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 4dbb8be1c991..3c5434c83daf 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -914,7 +914,7 @@ I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch
914queue and specific I/O schedulers. Unless stated otherwise, elevator is used 914queue and specific I/O schedulers. Unless stated otherwise, elevator is used
915to refer to both parts and I/O scheduler to specific I/O schedulers. 915to refer to both parts and I/O scheduler to specific I/O schedulers.
916 916
917Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c. 917Block layer implements generic dispatch queue in block/*.c.
918The generic dispatch queue is responsible for properly ordering barrier 918The generic dispatch queue is responsible for properly ordering barrier
919requests, requeueing, handling non-fs requests and all other subtleties. 919requests, requeueing, handling non-fs requests and all other subtleties.
920 920
@@ -926,8 +926,8 @@ be built inside the kernel. Each queue can choose different one and can also
926change to another one dynamically. 926change to another one dynamically.
927 927
928A block layer call to the i/o scheduler follows the convention elv_xxx(). This 928A block layer call to the i/o scheduler follows the convention elv_xxx(). This
929calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh, 929calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx
930xxx and xxx might not match exactly, but use your imagination. If an elevator 930and xxx might not match exactly, but use your imagination. If an elevator
931doesn't implement a function, the switch does nothing or some minimal house 931doesn't implement a function, the switch does nothing or some minimal house
932keeping work. 932keeping work.
933 933
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index e971ab000f95..eda9b909aa05 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -96,9 +96,6 @@ static inline dma_addr_t __deprecated isa_page_to_bus(struct page *page)
96 return page_to_phys(page); 96 return page_to_phys(page);
97} 97}
98 98
99/* This depends on working iommu. */
100#define BIO_VMERGE_BOUNDARY (alpha_mv.mv_pci_tbi ? PAGE_SIZE : 0)
101
102/* Maximum PIO space address supported? */ 99/* Maximum PIO space address supported? */
103#define IO_SPACE_LIMIT 0xffff 100#define IO_SPACE_LIMIT 0xffff
104 101
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index ef3635b52fc0..0767827540b1 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -263,7 +263,7 @@ int s390_enable_sie(void)
263 /* lets check if we are allowed to replace the mm */ 263 /* lets check if we are allowed to replace the mm */
264 task_lock(tsk); 264 task_lock(tsk);
265 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 265 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
266 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { 266 tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) {
267 task_unlock(tsk); 267 task_unlock(tsk);
268 return -EINVAL; 268 return -EINVAL;
269 } 269 }
@@ -279,7 +279,7 @@ int s390_enable_sie(void)
279 /* Now lets check again if something happened */ 279 /* Now lets check again if something happened */
280 task_lock(tsk); 280 task_lock(tsk);
281 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 281 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
282 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { 282 tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) {
283 mmput(mm); 283 mmput(mm);
284 task_unlock(tsk); 284 task_unlock(tsk);
285 return -EINVAL; 285 return -EINVAL;
diff --git a/block/Kconfig b/block/Kconfig
index 290b219fad9c..ac0956f77785 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -24,21 +24,17 @@ menuconfig BLOCK
24if BLOCK 24if BLOCK
25 25
26config LBD 26config LBD
27 bool "Support for Large Block Devices" 27 bool "Support for large block devices and files"
28 depends on !64BIT 28 depends on !64BIT
29 help 29 help
30 Enable block devices of size 2TB and larger. 30 Enable block devices or files of size 2TB and larger.
31 31
32 This option is required to support the full capacity of large 32 This option is required to support the full capacity of large
33 (2TB+) block devices, including RAID, disk, Network Block Device, 33 (2TB+) block devices, including RAID, disk, Network Block Device,
34 Logical Volume Manager (LVM) and loopback. 34 Logical Volume Manager (LVM) and loopback.
35 35
36 For example, RAID devices are frequently bigger than the capacity 36 This option also enables support for single files larger than
37 of the largest individual hard drive. 37 2TB.
38
39 This option is not required if you have individual disk drives
40 which total 2TB+ and you are not aggregating the capacity into
41 a large block device (e.g. using RAID or LVM).
42 38
43 If unsure, say N. 39 If unsure, say N.
44 40
@@ -58,15 +54,6 @@ config BLK_DEV_IO_TRACE
58 54
59 If unsure, say N. 55 If unsure, say N.
60 56
61config LSF
62 bool "Support for Large Single Files"
63 depends on !64BIT
64 help
65 Say Y here if you want to be able to handle very large files (2TB
66 and larger), otherwise say N.
67
68 If unsure, say Y.
69
70config BLK_DEV_BSG 57config BLK_DEV_BSG
71 bool "Block layer SG support v4 (EXPERIMENTAL)" 58 bool "Block layer SG support v4 (EXPERIMENTAL)"
72 depends on EXPERIMENTAL 59 depends on EXPERIMENTAL
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 71f0abb219ee..631f6f44460a 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1339,12 +1339,12 @@ static int as_may_queue(struct request_queue *q, int rw)
1339 return ret; 1339 return ret;
1340} 1340}
1341 1341
1342static void as_exit_queue(elevator_t *e) 1342static void as_exit_queue(struct elevator_queue *e)
1343{ 1343{
1344 struct as_data *ad = e->elevator_data; 1344 struct as_data *ad = e->elevator_data;
1345 1345
1346 del_timer_sync(&ad->antic_timer); 1346 del_timer_sync(&ad->antic_timer);
1347 kblockd_flush_work(&ad->antic_work); 1347 cancel_work_sync(&ad->antic_work);
1348 1348
1349 BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); 1349 BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
1350 BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); 1350 BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
@@ -1409,7 +1409,7 @@ as_var_store(unsigned long *var, const char *page, size_t count)
1409 return count; 1409 return count;
1410} 1410}
1411 1411
1412static ssize_t est_time_show(elevator_t *e, char *page) 1412static ssize_t est_time_show(struct elevator_queue *e, char *page)
1413{ 1413{
1414 struct as_data *ad = e->elevator_data; 1414 struct as_data *ad = e->elevator_data;
1415 int pos = 0; 1415 int pos = 0;
@@ -1427,7 +1427,7 @@ static ssize_t est_time_show(elevator_t *e, char *page)
1427} 1427}
1428 1428
1429#define SHOW_FUNCTION(__FUNC, __VAR) \ 1429#define SHOW_FUNCTION(__FUNC, __VAR) \
1430static ssize_t __FUNC(elevator_t *e, char *page) \ 1430static ssize_t __FUNC(struct elevator_queue *e, char *page) \
1431{ \ 1431{ \
1432 struct as_data *ad = e->elevator_data; \ 1432 struct as_data *ad = e->elevator_data; \
1433 return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ 1433 return as_var_show(jiffies_to_msecs((__VAR)), (page)); \
@@ -1440,7 +1440,7 @@ SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
1440#undef SHOW_FUNCTION 1440#undef SHOW_FUNCTION
1441 1441
1442#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ 1442#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
1443static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ 1443static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
1444{ \ 1444{ \
1445 struct as_data *ad = e->elevator_data; \ 1445 struct as_data *ad = e->elevator_data; \
1446 int ret = as_var_store(__PTR, (page), count); \ 1446 int ret = as_var_store(__PTR, (page), count); \
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6e72d661ae42..8eba4e43bb0c 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -24,8 +24,8 @@
24int blk_queue_ordered(struct request_queue *q, unsigned ordered, 24int blk_queue_ordered(struct request_queue *q, unsigned ordered,
25 prepare_flush_fn *prepare_flush_fn) 25 prepare_flush_fn *prepare_flush_fn)
26{ 26{
27 if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && 27 if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
28 prepare_flush_fn == NULL) { 28 QUEUE_ORDERED_DO_POSTFLUSH))) {
29 printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__); 29 printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
30 return -EINVAL; 30 return -EINVAL;
31 } 31 }
@@ -88,7 +88,7 @@ unsigned blk_ordered_req_seq(struct request *rq)
88 return QUEUE_ORDSEQ_DONE; 88 return QUEUE_ORDSEQ_DONE;
89} 89}
90 90
91void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) 91bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
92{ 92{
93 struct request *rq; 93 struct request *rq;
94 94
@@ -99,7 +99,7 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
99 q->ordseq |= seq; 99 q->ordseq |= seq;
100 100
101 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 101 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
102 return; 102 return false;
103 103
104 /* 104 /*
105 * Okay, sequence complete. 105 * Okay, sequence complete.
@@ -109,6 +109,8 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
109 109
110 if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) 110 if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq)))
111 BUG(); 111 BUG();
112
113 return true;
112} 114}
113 115
114static void pre_flush_end_io(struct request *rq, int error) 116static void pre_flush_end_io(struct request *rq, int error)
@@ -134,7 +136,7 @@ static void queue_flush(struct request_queue *q, unsigned which)
134 struct request *rq; 136 struct request *rq;
135 rq_end_io_fn *end_io; 137 rq_end_io_fn *end_io;
136 138
137 if (which == QUEUE_ORDERED_PREFLUSH) { 139 if (which == QUEUE_ORDERED_DO_PREFLUSH) {
138 rq = &q->pre_flush_rq; 140 rq = &q->pre_flush_rq;
139 end_io = pre_flush_end_io; 141 end_io = pre_flush_end_io;
140 } else { 142 } else {
@@ -151,80 +153,110 @@ static void queue_flush(struct request_queue *q, unsigned which)
151 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 153 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
152} 154}
153 155
154static inline struct request *start_ordered(struct request_queue *q, 156static inline bool start_ordered(struct request_queue *q, struct request **rqp)
155 struct request *rq)
156{ 157{
158 struct request *rq = *rqp;
159 unsigned skip = 0;
160
157 q->orderr = 0; 161 q->orderr = 0;
158 q->ordered = q->next_ordered; 162 q->ordered = q->next_ordered;
159 q->ordseq |= QUEUE_ORDSEQ_STARTED; 163 q->ordseq |= QUEUE_ORDSEQ_STARTED;
160 164
161 /* 165 /*
162 * Prep proxy barrier request. 166 * For an empty barrier, there's no actual BAR request, which
167 * in turn makes POSTFLUSH unnecessary. Mask them off.
163 */ 168 */
169 if (!rq->hard_nr_sectors) {
170 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
171 QUEUE_ORDERED_DO_POSTFLUSH);
172 /*
173 * Empty barrier on a write-through device w/ ordered
174 * tag has no command to issue and without any command
175 * to issue, ordering by tag can't be used. Drain
176 * instead.
177 */
178 if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
179 !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
180 q->ordered &= ~QUEUE_ORDERED_BY_TAG;
181 q->ordered |= QUEUE_ORDERED_BY_DRAIN;
182 }
183 }
184
185 /* stash away the original request */
164 elv_dequeue_request(q, rq); 186 elv_dequeue_request(q, rq);
165 q->orig_bar_rq = rq; 187 q->orig_bar_rq = rq;
166 rq = &q->bar_rq; 188 rq = NULL;
167 blk_rq_init(q, rq);
168 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
169 rq->cmd_flags |= REQ_RW;
170 if (q->ordered & QUEUE_ORDERED_FUA)
171 rq->cmd_flags |= REQ_FUA;
172 init_request_from_bio(rq, q->orig_bar_rq->bio);
173 rq->end_io = bar_end_io;
174 189
175 /* 190 /*
176 * Queue ordered sequence. As we stack them at the head, we 191 * Queue ordered sequence. As we stack them at the head, we
177 * need to queue in reverse order. Note that we rely on that 192 * need to queue in reverse order. Note that we rely on that
178 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs 193 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
179 * request gets inbetween ordered sequence. If this request is 194 * request gets inbetween ordered sequence.
180 * an empty barrier, we don't need to do a postflush ever since
181 * there will be no data written between the pre and post flush.
182 * Hence a single flush will suffice.
183 */ 195 */
184 if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) 196 if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
185 queue_flush(q, QUEUE_ORDERED_POSTFLUSH); 197 queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
186 else 198 rq = &q->post_flush_rq;
187 q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; 199 } else
200 skip |= QUEUE_ORDSEQ_POSTFLUSH;
188 201
189 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 202 if (q->ordered & QUEUE_ORDERED_DO_BAR) {
203 rq = &q->bar_rq;
204
205 /* initialize proxy request and queue it */
206 blk_rq_init(q, rq);
207 if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
208 rq->cmd_flags |= REQ_RW;
209 if (q->ordered & QUEUE_ORDERED_DO_FUA)
210 rq->cmd_flags |= REQ_FUA;
211 init_request_from_bio(rq, q->orig_bar_rq->bio);
212 rq->end_io = bar_end_io;
190 213
191 if (q->ordered & QUEUE_ORDERED_PREFLUSH) { 214 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
192 queue_flush(q, QUEUE_ORDERED_PREFLUSH); 215 } else
216 skip |= QUEUE_ORDSEQ_BAR;
217
218 if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
219 queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
193 rq = &q->pre_flush_rq; 220 rq = &q->pre_flush_rq;
194 } else 221 } else
195 q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; 222 skip |= QUEUE_ORDSEQ_PREFLUSH;
196 223
197 if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) 224 if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight)
198 q->ordseq |= QUEUE_ORDSEQ_DRAIN;
199 else
200 rq = NULL; 225 rq = NULL;
226 else
227 skip |= QUEUE_ORDSEQ_DRAIN;
228
229 *rqp = rq;
201 230
202 return rq; 231 /*
232 * Complete skipped sequences. If whole sequence is complete,
233 * return false to tell elevator that this request is gone.
234 */
235 return !blk_ordered_complete_seq(q, skip, 0);
203} 236}
204 237
205int blk_do_ordered(struct request_queue *q, struct request **rqp) 238bool blk_do_ordered(struct request_queue *q, struct request **rqp)
206{ 239{
207 struct request *rq = *rqp; 240 struct request *rq = *rqp;
208 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); 241 const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
209 242
210 if (!q->ordseq) { 243 if (!q->ordseq) {
211 if (!is_barrier) 244 if (!is_barrier)
212 return 1; 245 return true;
213 246
214 if (q->next_ordered != QUEUE_ORDERED_NONE) { 247 if (q->next_ordered != QUEUE_ORDERED_NONE)
215 *rqp = start_ordered(q, rq); 248 return start_ordered(q, rqp);
216 return 1; 249 else {
217 } else {
218 /* 250 /*
219 * This can happen when the queue switches to 251 * Queue ordering not supported. Terminate
220 * ORDERED_NONE while this request is on it. 252 * with prejudice.
221 */ 253 */
222 elv_dequeue_request(q, rq); 254 elv_dequeue_request(q, rq);
223 if (__blk_end_request(rq, -EOPNOTSUPP, 255 if (__blk_end_request(rq, -EOPNOTSUPP,
224 blk_rq_bytes(rq))) 256 blk_rq_bytes(rq)))
225 BUG(); 257 BUG();
226 *rqp = NULL; 258 *rqp = NULL;
227 return 0; 259 return false;
228 } 260 }
229 } 261 }
230 262
@@ -235,9 +267,9 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
235 /* Special requests are not subject to ordering rules. */ 267 /* Special requests are not subject to ordering rules. */
236 if (!blk_fs_request(rq) && 268 if (!blk_fs_request(rq) &&
237 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 269 rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
238 return 1; 270 return true;
239 271
240 if (q->ordered & QUEUE_ORDERED_TAG) { 272 if (q->ordered & QUEUE_ORDERED_BY_TAG) {
241 /* Ordered by tag. Blocking the next barrier is enough. */ 273 /* Ordered by tag. Blocking the next barrier is enough. */
242 if (is_barrier && rq != &q->bar_rq) 274 if (is_barrier && rq != &q->bar_rq)
243 *rqp = NULL; 275 *rqp = NULL;
@@ -248,7 +280,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp)
248 *rqp = NULL; 280 *rqp = NULL;
249 } 281 }
250 282
251 return 1; 283 return true;
252} 284}
253 285
254static void bio_end_empty_barrier(struct bio *bio, int err) 286static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/blk-core.c b/block/blk-core.c
index 561e8a1b43a4..a824e49c0d0a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -153,6 +153,9 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
153 nbytes = bio->bi_size; 153 nbytes = bio->bi_size;
154 } 154 }
155 155
156 if (unlikely(rq->cmd_flags & REQ_QUIET))
157 set_bit(BIO_QUIET, &bio->bi_flags);
158
156 bio->bi_size -= nbytes; 159 bio->bi_size -= nbytes;
157 bio->bi_sector += (nbytes >> 9); 160 bio->bi_sector += (nbytes >> 9);
158 161
@@ -265,8 +268,7 @@ void __generic_unplug_device(struct request_queue *q)
265{ 268{
266 if (unlikely(blk_queue_stopped(q))) 269 if (unlikely(blk_queue_stopped(q)))
267 return; 270 return;
268 271 if (!blk_remove_plug(q) && !blk_queue_nonrot(q))
269 if (!blk_remove_plug(q))
270 return; 272 return;
271 273
272 q->request_fn(q); 274 q->request_fn(q);
@@ -404,7 +406,8 @@ EXPORT_SYMBOL(blk_stop_queue);
404void blk_sync_queue(struct request_queue *q) 406void blk_sync_queue(struct request_queue *q)
405{ 407{
406 del_timer_sync(&q->unplug_timer); 408 del_timer_sync(&q->unplug_timer);
407 kblockd_flush_work(&q->unplug_work); 409 del_timer_sync(&q->timeout);
410 cancel_work_sync(&q->unplug_work);
408} 411}
409EXPORT_SYMBOL(blk_sync_queue); 412EXPORT_SYMBOL(blk_sync_queue);
410 413
@@ -1135,7 +1138,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1135static int __make_request(struct request_queue *q, struct bio *bio) 1138static int __make_request(struct request_queue *q, struct bio *bio)
1136{ 1139{
1137 struct request *req; 1140 struct request *req;
1138 int el_ret, nr_sectors, barrier, discard, err; 1141 int el_ret, nr_sectors;
1139 const unsigned short prio = bio_prio(bio); 1142 const unsigned short prio = bio_prio(bio);
1140 const int sync = bio_sync(bio); 1143 const int sync = bio_sync(bio);
1141 int rw_flags; 1144 int rw_flags;
@@ -1149,22 +1152,9 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1149 */ 1152 */
1150 blk_queue_bounce(q, &bio); 1153 blk_queue_bounce(q, &bio);
1151 1154
1152 barrier = bio_barrier(bio);
1153 if (unlikely(barrier) && bio_has_data(bio) &&
1154 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1155 err = -EOPNOTSUPP;
1156 goto end_io;
1157 }
1158
1159 discard = bio_discard(bio);
1160 if (unlikely(discard) && !q->prepare_discard_fn) {
1161 err = -EOPNOTSUPP;
1162 goto end_io;
1163 }
1164
1165 spin_lock_irq(q->queue_lock); 1155 spin_lock_irq(q->queue_lock);
1166 1156
1167 if (unlikely(barrier) || elv_queue_empty(q)) 1157 if (unlikely(bio_barrier(bio)) || elv_queue_empty(q))
1168 goto get_rq; 1158 goto get_rq;
1169 1159
1170 el_ret = elv_merge(q, &req, bio); 1160 el_ret = elv_merge(q, &req, bio);
@@ -1250,18 +1240,14 @@ get_rq:
1250 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || 1240 if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
1251 bio_flagged(bio, BIO_CPU_AFFINE)) 1241 bio_flagged(bio, BIO_CPU_AFFINE))
1252 req->cpu = blk_cpu_to_group(smp_processor_id()); 1242 req->cpu = blk_cpu_to_group(smp_processor_id());
1253 if (elv_queue_empty(q)) 1243 if (!blk_queue_nonrot(q) && elv_queue_empty(q))
1254 blk_plug_device(q); 1244 blk_plug_device(q);
1255 add_request(q, req); 1245 add_request(q, req);
1256out: 1246out:
1257 if (sync) 1247 if (sync || blk_queue_nonrot(q))
1258 __generic_unplug_device(q); 1248 __generic_unplug_device(q);
1259 spin_unlock_irq(q->queue_lock); 1249 spin_unlock_irq(q->queue_lock);
1260 return 0; 1250 return 0;
1261
1262end_io:
1263 bio_endio(bio, err);
1264 return 0;
1265} 1251}
1266 1252
1267/* 1253/*
@@ -1414,15 +1400,13 @@ static inline void __generic_make_request(struct bio *bio)
1414 char b[BDEVNAME_SIZE]; 1400 char b[BDEVNAME_SIZE];
1415 1401
1416 q = bdev_get_queue(bio->bi_bdev); 1402 q = bdev_get_queue(bio->bi_bdev);
1417 if (!q) { 1403 if (unlikely(!q)) {
1418 printk(KERN_ERR 1404 printk(KERN_ERR
1419 "generic_make_request: Trying to access " 1405 "generic_make_request: Trying to access "
1420 "nonexistent block-device %s (%Lu)\n", 1406 "nonexistent block-device %s (%Lu)\n",
1421 bdevname(bio->bi_bdev, b), 1407 bdevname(bio->bi_bdev, b),
1422 (long long) bio->bi_sector); 1408 (long long) bio->bi_sector);
1423end_io: 1409 goto end_io;
1424 bio_endio(bio, err);
1425 break;
1426 } 1410 }
1427 1411
1428 if (unlikely(nr_sectors > q->max_hw_sectors)) { 1412 if (unlikely(nr_sectors > q->max_hw_sectors)) {
@@ -1459,14 +1443,19 @@ end_io:
1459 1443
1460 if (bio_check_eod(bio, nr_sectors)) 1444 if (bio_check_eod(bio, nr_sectors))
1461 goto end_io; 1445 goto end_io;
1462 if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || 1446
1463 (bio_discard(bio) && !q->prepare_discard_fn)) { 1447 if (bio_discard(bio) && !q->prepare_discard_fn) {
1464 err = -EOPNOTSUPP; 1448 err = -EOPNOTSUPP;
1465 goto end_io; 1449 goto end_io;
1466 } 1450 }
1467 1451
1468 ret = q->make_request_fn(q, bio); 1452 ret = q->make_request_fn(q, bio);
1469 } while (ret); 1453 } while (ret);
1454
1455 return;
1456
1457end_io:
1458 bio_endio(bio, err);
1470} 1459}
1471 1460
1472/* 1461/*
@@ -1716,14 +1705,6 @@ static int __end_that_request_first(struct request *req, int error,
1716 while ((bio = req->bio) != NULL) { 1705 while ((bio = req->bio) != NULL) {
1717 int nbytes; 1706 int nbytes;
1718 1707
1719 /*
1720 * For an empty barrier request, the low level driver must
1721 * store a potential error location in ->sector. We pass
1722 * that back up in ->bi_sector.
1723 */
1724 if (blk_empty_barrier(req))
1725 bio->bi_sector = req->sector;
1726
1727 if (nr_bytes >= bio->bi_size) { 1708 if (nr_bytes >= bio->bi_size) {
1728 req->bio = bio->bi_next; 1709 req->bio = bio->bi_next;
1729 nbytes = bio->bi_size; 1710 nbytes = bio->bi_size;
@@ -2143,12 +2124,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
2143} 2124}
2144EXPORT_SYMBOL(kblockd_schedule_work); 2125EXPORT_SYMBOL(kblockd_schedule_work);
2145 2126
2146void kblockd_flush_work(struct work_struct *work)
2147{
2148 cancel_work_sync(work);
2149}
2150EXPORT_SYMBOL(kblockd_flush_work);
2151
2152int __init blk_dev_init(void) 2127int __init blk_dev_init(void)
2153{ 2128{
2154 kblockd_workqueue = create_workqueue("kblockd"); 2129 kblockd_workqueue = create_workqueue("kblockd");
diff --git a/block/blk-settings.c b/block/blk-settings.c
index afa55e14e278..59fd05d9f1d5 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -319,9 +319,9 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
319 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); 319 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
320 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); 320 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask);
321 321
322 t->max_phys_segments = min(t->max_phys_segments, b->max_phys_segments); 322 t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments);
323 t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments); 323 t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments);
324 t->max_segment_size = min(t->max_segment_size, b->max_segment_size); 324 t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size);
325 t->hardsect_size = max(t->hardsect_size, b->hardsect_size); 325 t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
326 if (!t->queue_lock) 326 if (!t->queue_lock)
327 WARN_ON_ONCE(1); 327 WARN_ON_ONCE(1);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index e660d26ca656..ce0efc6b26dc 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -161,7 +161,7 @@ void blk_complete_request(struct request *req)
161} 161}
162EXPORT_SYMBOL(blk_complete_request); 162EXPORT_SYMBOL(blk_complete_request);
163 163
164__init int blk_softirq_init(void) 164static __init int blk_softirq_init(void)
165{ 165{
166 int i; 166 int i;
167 167
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 21e275d7eed9..a29cb788e408 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -88,9 +88,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
88 unsigned long ra_kb; 88 unsigned long ra_kb;
89 ssize_t ret = queue_var_store(&ra_kb, page, count); 89 ssize_t ret = queue_var_store(&ra_kb, page, count);
90 90
91 spin_lock_irq(q->queue_lock);
92 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); 91 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
93 spin_unlock_irq(q->queue_lock);
94 92
95 return ret; 93 return ret;
96} 94}
@@ -117,10 +115,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
117 115
118 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) 116 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
119 return -EINVAL; 117 return -EINVAL;
120 /* 118
121 * Take the queue lock to update the readahead and max_sectors
122 * values synchronously:
123 */
124 spin_lock_irq(q->queue_lock); 119 spin_lock_irq(q->queue_lock);
125 q->max_sectors = max_sectors_kb << 1; 120 q->max_sectors = max_sectors_kb << 1;
126 spin_unlock_irq(q->queue_lock); 121 spin_unlock_irq(q->queue_lock);
diff --git a/block/blk-tag.c b/block/blk-tag.c
index c0d419e84ce7..3c518e3303ae 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -158,7 +158,6 @@ fail:
158/** 158/**
159 * blk_init_tags - initialize the tag info for an external tag map 159 * blk_init_tags - initialize the tag info for an external tag map
160 * @depth: the maximum queue depth supported 160 * @depth: the maximum queue depth supported
161 * @tags: the tag to use
162 **/ 161 **/
163struct blk_queue_tag *blk_init_tags(int depth) 162struct blk_queue_tag *blk_init_tags(int depth)
164{ 163{
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 69185ea9fae2..a09535377a94 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -73,11 +73,7 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
73 */ 73 */
74void blk_delete_timer(struct request *req) 74void blk_delete_timer(struct request *req)
75{ 75{
76 struct request_queue *q = req->q;
77
78 list_del_init(&req->timeout_list); 76 list_del_init(&req->timeout_list);
79 if (list_empty(&q->timeout_list))
80 del_timer(&q->timeout);
81} 77}
82 78
83static void blk_rq_timed_out(struct request *req) 79static void blk_rq_timed_out(struct request *req)
@@ -111,7 +107,7 @@ static void blk_rq_timed_out(struct request *req)
111void blk_rq_timed_out_timer(unsigned long data) 107void blk_rq_timed_out_timer(unsigned long data)
112{ 108{
113 struct request_queue *q = (struct request_queue *) data; 109 struct request_queue *q = (struct request_queue *) data;
114 unsigned long flags, uninitialized_var(next), next_set = 0; 110 unsigned long flags, next = 0;
115 struct request *rq, *tmp; 111 struct request *rq, *tmp;
116 112
117 spin_lock_irqsave(q->queue_lock, flags); 113 spin_lock_irqsave(q->queue_lock, flags);
@@ -126,15 +122,18 @@ void blk_rq_timed_out_timer(unsigned long data)
126 if (blk_mark_rq_complete(rq)) 122 if (blk_mark_rq_complete(rq))
127 continue; 123 continue;
128 blk_rq_timed_out(rq); 124 blk_rq_timed_out(rq);
125 } else {
126 if (!next || time_after(next, rq->deadline))
127 next = rq->deadline;
129 } 128 }
130 if (!next_set) {
131 next = rq->deadline;
132 next_set = 1;
133 } else if (time_after(next, rq->deadline))
134 next = rq->deadline;
135 } 129 }
136 130
137 if (next_set && !list_empty(&q->timeout_list)) 131 /*
132 * next can never be 0 here with the list non-empty, since we always
133 * bump ->deadline to 1 so we can detect if the timer was ever added
134 * or not. See comment in blk_add_timer()
135 */
136 if (next)
138 mod_timer(&q->timeout, round_jiffies_up(next)); 137 mod_timer(&q->timeout, round_jiffies_up(next));
139 138
140 spin_unlock_irqrestore(q->queue_lock, flags); 139 spin_unlock_irqrestore(q->queue_lock, flags);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6a062eebbd15..e8525fa72823 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1136,12 +1136,8 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
1136 if (cfq_class_idle(cfqq)) 1136 if (cfq_class_idle(cfqq))
1137 max_dispatch = 1; 1137 max_dispatch = 1;
1138 1138
1139 if (cfqq->dispatched >= max_dispatch) { 1139 if (cfqq->dispatched >= max_dispatch && cfqd->busy_queues > 1)
1140 if (cfqd->busy_queues > 1) 1140 break;
1141 break;
1142 if (cfqq->dispatched >= 4 * max_dispatch)
1143 break;
1144 }
1145 1141
1146 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) 1142 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
1147 break; 1143 break;
@@ -1318,7 +1314,15 @@ static void cfq_exit_single_io_context(struct io_context *ioc,
1318 unsigned long flags; 1314 unsigned long flags;
1319 1315
1320 spin_lock_irqsave(q->queue_lock, flags); 1316 spin_lock_irqsave(q->queue_lock, flags);
1321 __cfq_exit_single_io_context(cfqd, cic); 1317
1318 /*
1319 * Ensure we get a fresh copy of the ->key to prevent
1320 * race between exiting task and queue
1321 */
1322 smp_read_barrier_depends();
1323 if (cic->key)
1324 __cfq_exit_single_io_context(cfqd, cic);
1325
1322 spin_unlock_irqrestore(q->queue_lock, flags); 1326 spin_unlock_irqrestore(q->queue_lock, flags);
1323 } 1327 }
1324} 1328}
@@ -2160,7 +2164,7 @@ out_cont:
2160static void cfq_shutdown_timer_wq(struct cfq_data *cfqd) 2164static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
2161{ 2165{
2162 del_timer_sync(&cfqd->idle_slice_timer); 2166 del_timer_sync(&cfqd->idle_slice_timer);
2163 kblockd_flush_work(&cfqd->unplug_work); 2167 cancel_work_sync(&cfqd->unplug_work);
2164} 2168}
2165 2169
2166static void cfq_put_async_queues(struct cfq_data *cfqd) 2170static void cfq_put_async_queues(struct cfq_data *cfqd)
@@ -2178,7 +2182,7 @@ static void cfq_put_async_queues(struct cfq_data *cfqd)
2178 cfq_put_queue(cfqd->async_idle_cfqq); 2182 cfq_put_queue(cfqd->async_idle_cfqq);
2179} 2183}
2180 2184
2181static void cfq_exit_queue(elevator_t *e) 2185static void cfq_exit_queue(struct elevator_queue *e)
2182{ 2186{
2183 struct cfq_data *cfqd = e->elevator_data; 2187 struct cfq_data *cfqd = e->elevator_data;
2184 struct request_queue *q = cfqd->queue; 2188 struct request_queue *q = cfqd->queue;
@@ -2288,7 +2292,7 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
2288} 2292}
2289 2293
2290#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 2294#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
2291static ssize_t __FUNC(elevator_t *e, char *page) \ 2295static ssize_t __FUNC(struct elevator_queue *e, char *page) \
2292{ \ 2296{ \
2293 struct cfq_data *cfqd = e->elevator_data; \ 2297 struct cfq_data *cfqd = e->elevator_data; \
2294 unsigned int __data = __VAR; \ 2298 unsigned int __data = __VAR; \
@@ -2308,7 +2312,7 @@ SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
2308#undef SHOW_FUNCTION 2312#undef SHOW_FUNCTION
2309 2313
2310#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 2314#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
2311static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ 2315static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
2312{ \ 2316{ \
2313 struct cfq_data *cfqd = e->elevator_data; \ 2317 struct cfq_data *cfqd = e->elevator_data; \
2314 unsigned int __data; \ 2318 unsigned int __data; \
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 67eb93cff699..f87615dea46b 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -774,9 +774,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
774 bdi = blk_get_backing_dev_info(bdev); 774 bdi = blk_get_backing_dev_info(bdev);
775 if (bdi == NULL) 775 if (bdi == NULL)
776 return -ENOTTY; 776 return -ENOTTY;
777 lock_kernel();
778 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; 777 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
779 unlock_kernel();
780 return 0; 778 return 0;
781 case BLKGETSIZE: 779 case BLKGETSIZE:
782 size = bdev->bd_inode->i_size; 780 size = bdev->bd_inode->i_size;
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index fd311179f44c..c4d991d4adef 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -334,7 +334,7 @@ static int deadline_queue_empty(struct request_queue *q)
334 && list_empty(&dd->fifo_list[READ]); 334 && list_empty(&dd->fifo_list[READ]);
335} 335}
336 336
337static void deadline_exit_queue(elevator_t *e) 337static void deadline_exit_queue(struct elevator_queue *e)
338{ 338{
339 struct deadline_data *dd = e->elevator_data; 339 struct deadline_data *dd = e->elevator_data;
340 340
@@ -387,7 +387,7 @@ deadline_var_store(int *var, const char *page, size_t count)
387} 387}
388 388
389#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ 389#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
390static ssize_t __FUNC(elevator_t *e, char *page) \ 390static ssize_t __FUNC(struct elevator_queue *e, char *page) \
391{ \ 391{ \
392 struct deadline_data *dd = e->elevator_data; \ 392 struct deadline_data *dd = e->elevator_data; \
393 int __data = __VAR; \ 393 int __data = __VAR; \
@@ -403,7 +403,7 @@ SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
403#undef SHOW_FUNCTION 403#undef SHOW_FUNCTION
404 404
405#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ 405#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
406static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ 406static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
407{ \ 407{ \
408 struct deadline_data *dd = e->elevator_data; \ 408 struct deadline_data *dd = e->elevator_data; \
409 int __data; \ 409 int __data; \
diff --git a/block/elevator.c b/block/elevator.c
index 86836dd179c0..98259eda0ef6 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -65,7 +65,7 @@ DEFINE_TRACE(block_rq_issue);
65static int elv_iosched_allow_merge(struct request *rq, struct bio *bio) 65static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
66{ 66{
67 struct request_queue *q = rq->q; 67 struct request_queue *q = rq->q;
68 elevator_t *e = q->elevator; 68 struct elevator_queue *e = q->elevator;
69 69
70 if (e->ops->elevator_allow_merge_fn) 70 if (e->ops->elevator_allow_merge_fn)
71 return e->ops->elevator_allow_merge_fn(q, rq, bio); 71 return e->ops->elevator_allow_merge_fn(q, rq, bio);
@@ -208,13 +208,13 @@ __setup("elevator=", elevator_setup);
208 208
209static struct kobj_type elv_ktype; 209static struct kobj_type elv_ktype;
210 210
211static elevator_t *elevator_alloc(struct request_queue *q, 211static struct elevator_queue *elevator_alloc(struct request_queue *q,
212 struct elevator_type *e) 212 struct elevator_type *e)
213{ 213{
214 elevator_t *eq; 214 struct elevator_queue *eq;
215 int i; 215 int i;
216 216
217 eq = kmalloc_node(sizeof(elevator_t), GFP_KERNEL | __GFP_ZERO, q->node); 217 eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node);
218 if (unlikely(!eq)) 218 if (unlikely(!eq))
219 goto err; 219 goto err;
220 220
@@ -240,8 +240,9 @@ err:
240 240
241static void elevator_release(struct kobject *kobj) 241static void elevator_release(struct kobject *kobj)
242{ 242{
243 elevator_t *e = container_of(kobj, elevator_t, kobj); 243 struct elevator_queue *e;
244 244
245 e = container_of(kobj, struct elevator_queue, kobj);
245 elevator_put(e->elevator_type); 246 elevator_put(e->elevator_type);
246 kfree(e->hash); 247 kfree(e->hash);
247 kfree(e); 248 kfree(e);
@@ -297,7 +298,7 @@ int elevator_init(struct request_queue *q, char *name)
297} 298}
298EXPORT_SYMBOL(elevator_init); 299EXPORT_SYMBOL(elevator_init);
299 300
300void elevator_exit(elevator_t *e) 301void elevator_exit(struct elevator_queue *e)
301{ 302{
302 mutex_lock(&e->sysfs_lock); 303 mutex_lock(&e->sysfs_lock);
303 if (e->ops->elevator_exit_fn) 304 if (e->ops->elevator_exit_fn)
@@ -311,7 +312,7 @@ EXPORT_SYMBOL(elevator_exit);
311 312
312static void elv_activate_rq(struct request_queue *q, struct request *rq) 313static void elv_activate_rq(struct request_queue *q, struct request *rq)
313{ 314{
314 elevator_t *e = q->elevator; 315 struct elevator_queue *e = q->elevator;
315 316
316 if (e->ops->elevator_activate_req_fn) 317 if (e->ops->elevator_activate_req_fn)
317 e->ops->elevator_activate_req_fn(q, rq); 318 e->ops->elevator_activate_req_fn(q, rq);
@@ -319,7 +320,7 @@ static void elv_activate_rq(struct request_queue *q, struct request *rq)
319 320
320static void elv_deactivate_rq(struct request_queue *q, struct request *rq) 321static void elv_deactivate_rq(struct request_queue *q, struct request *rq)
321{ 322{
322 elevator_t *e = q->elevator; 323 struct elevator_queue *e = q->elevator;
323 324
324 if (e->ops->elevator_deactivate_req_fn) 325 if (e->ops->elevator_deactivate_req_fn)
325 e->ops->elevator_deactivate_req_fn(q, rq); 326 e->ops->elevator_deactivate_req_fn(q, rq);
@@ -338,7 +339,7 @@ static void elv_rqhash_del(struct request_queue *q, struct request *rq)
338 339
339static void elv_rqhash_add(struct request_queue *q, struct request *rq) 340static void elv_rqhash_add(struct request_queue *q, struct request *rq)
340{ 341{
341 elevator_t *e = q->elevator; 342 struct elevator_queue *e = q->elevator;
342 343
343 BUG_ON(ELV_ON_HASH(rq)); 344 BUG_ON(ELV_ON_HASH(rq));
344 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); 345 hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]);
@@ -352,7 +353,7 @@ static void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
352 353
353static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) 354static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
354{ 355{
355 elevator_t *e = q->elevator; 356 struct elevator_queue *e = q->elevator;
356 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; 357 struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)];
357 struct hlist_node *entry, *next; 358 struct hlist_node *entry, *next;
358 struct request *rq; 359 struct request *rq;
@@ -494,7 +495,7 @@ EXPORT_SYMBOL(elv_dispatch_add_tail);
494 495
495int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) 496int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
496{ 497{
497 elevator_t *e = q->elevator; 498 struct elevator_queue *e = q->elevator;
498 struct request *__rq; 499 struct request *__rq;
499 int ret; 500 int ret;
500 501
@@ -529,7 +530,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
529 530
530void elv_merged_request(struct request_queue *q, struct request *rq, int type) 531void elv_merged_request(struct request_queue *q, struct request *rq, int type)
531{ 532{
532 elevator_t *e = q->elevator; 533 struct elevator_queue *e = q->elevator;
533 534
534 if (e->ops->elevator_merged_fn) 535 if (e->ops->elevator_merged_fn)
535 e->ops->elevator_merged_fn(q, rq, type); 536 e->ops->elevator_merged_fn(q, rq, type);
@@ -543,7 +544,7 @@ void elv_merged_request(struct request_queue *q, struct request *rq, int type)
543void elv_merge_requests(struct request_queue *q, struct request *rq, 544void elv_merge_requests(struct request_queue *q, struct request *rq,
544 struct request *next) 545 struct request *next)
545{ 546{
546 elevator_t *e = q->elevator; 547 struct elevator_queue *e = q->elevator;
547 548
548 if (e->ops->elevator_merge_req_fn) 549 if (e->ops->elevator_merge_req_fn)
549 e->ops->elevator_merge_req_fn(q, rq, next); 550 e->ops->elevator_merge_req_fn(q, rq, next);
@@ -755,14 +756,6 @@ struct request *elv_next_request(struct request_queue *q)
755 int ret; 756 int ret;
756 757
757 while ((rq = __elv_next_request(q)) != NULL) { 758 while ((rq = __elv_next_request(q)) != NULL) {
758 /*
759 * Kill the empty barrier place holder, the driver must
760 * not ever see it.
761 */
762 if (blk_empty_barrier(rq)) {
763 __blk_end_request(rq, 0, blk_rq_bytes(rq));
764 continue;
765 }
766 if (!(rq->cmd_flags & REQ_STARTED)) { 759 if (!(rq->cmd_flags & REQ_STARTED)) {
767 /* 760 /*
768 * This is the first time the device driver 761 * This is the first time the device driver
@@ -854,7 +847,7 @@ void elv_dequeue_request(struct request_queue *q, struct request *rq)
854 847
855int elv_queue_empty(struct request_queue *q) 848int elv_queue_empty(struct request_queue *q)
856{ 849{
857 elevator_t *e = q->elevator; 850 struct elevator_queue *e = q->elevator;
858 851
859 if (!list_empty(&q->queue_head)) 852 if (!list_empty(&q->queue_head))
860 return 0; 853 return 0;
@@ -868,7 +861,7 @@ EXPORT_SYMBOL(elv_queue_empty);
868 861
869struct request *elv_latter_request(struct request_queue *q, struct request *rq) 862struct request *elv_latter_request(struct request_queue *q, struct request *rq)
870{ 863{
871 elevator_t *e = q->elevator; 864 struct elevator_queue *e = q->elevator;
872 865
873 if (e->ops->elevator_latter_req_fn) 866 if (e->ops->elevator_latter_req_fn)
874 return e->ops->elevator_latter_req_fn(q, rq); 867 return e->ops->elevator_latter_req_fn(q, rq);
@@ -877,7 +870,7 @@ struct request *elv_latter_request(struct request_queue *q, struct request *rq)
877 870
878struct request *elv_former_request(struct request_queue *q, struct request *rq) 871struct request *elv_former_request(struct request_queue *q, struct request *rq)
879{ 872{
880 elevator_t *e = q->elevator; 873 struct elevator_queue *e = q->elevator;
881 874
882 if (e->ops->elevator_former_req_fn) 875 if (e->ops->elevator_former_req_fn)
883 return e->ops->elevator_former_req_fn(q, rq); 876 return e->ops->elevator_former_req_fn(q, rq);
@@ -886,7 +879,7 @@ struct request *elv_former_request(struct request_queue *q, struct request *rq)
886 879
887int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) 880int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
888{ 881{
889 elevator_t *e = q->elevator; 882 struct elevator_queue *e = q->elevator;
890 883
891 if (e->ops->elevator_set_req_fn) 884 if (e->ops->elevator_set_req_fn)
892 return e->ops->elevator_set_req_fn(q, rq, gfp_mask); 885 return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
@@ -897,7 +890,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
897 890
898void elv_put_request(struct request_queue *q, struct request *rq) 891void elv_put_request(struct request_queue *q, struct request *rq)
899{ 892{
900 elevator_t *e = q->elevator; 893 struct elevator_queue *e = q->elevator;
901 894
902 if (e->ops->elevator_put_req_fn) 895 if (e->ops->elevator_put_req_fn)
903 e->ops->elevator_put_req_fn(rq); 896 e->ops->elevator_put_req_fn(rq);
@@ -905,7 +898,7 @@ void elv_put_request(struct request_queue *q, struct request *rq)
905 898
906int elv_may_queue(struct request_queue *q, int rw) 899int elv_may_queue(struct request_queue *q, int rw)
907{ 900{
908 elevator_t *e = q->elevator; 901 struct elevator_queue *e = q->elevator;
909 902
910 if (e->ops->elevator_may_queue_fn) 903 if (e->ops->elevator_may_queue_fn)
911 return e->ops->elevator_may_queue_fn(q, rw); 904 return e->ops->elevator_may_queue_fn(q, rw);
@@ -928,7 +921,7 @@ EXPORT_SYMBOL(elv_abort_queue);
928 921
929void elv_completed_request(struct request_queue *q, struct request *rq) 922void elv_completed_request(struct request_queue *q, struct request *rq)
930{ 923{
931 elevator_t *e = q->elevator; 924 struct elevator_queue *e = q->elevator;
932 925
933 /* 926 /*
934 * request is released from the driver, io must be done 927 * request is released from the driver, io must be done
@@ -944,10 +937,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
944 * drained for flush sequence. 937 * drained for flush sequence.
945 */ 938 */
946 if (unlikely(q->ordseq)) { 939 if (unlikely(q->ordseq)) {
947 struct request *first_rq = list_entry_rq(q->queue_head.next); 940 struct request *next = NULL;
948 if (q->in_flight == 0 && 941
942 if (!list_empty(&q->queue_head))
943 next = list_entry_rq(q->queue_head.next);
944
945 if (!q->in_flight &&
949 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && 946 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
950 blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { 947 (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
951 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); 948 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
952 blk_start_queueing(q); 949 blk_start_queueing(q);
953 } 950 }
@@ -959,13 +956,14 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
959static ssize_t 956static ssize_t
960elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page) 957elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
961{ 958{
962 elevator_t *e = container_of(kobj, elevator_t, kobj);
963 struct elv_fs_entry *entry = to_elv(attr); 959 struct elv_fs_entry *entry = to_elv(attr);
960 struct elevator_queue *e;
964 ssize_t error; 961 ssize_t error;
965 962
966 if (!entry->show) 963 if (!entry->show)
967 return -EIO; 964 return -EIO;
968 965
966 e = container_of(kobj, struct elevator_queue, kobj);
969 mutex_lock(&e->sysfs_lock); 967 mutex_lock(&e->sysfs_lock);
970 error = e->ops ? entry->show(e, page) : -ENOENT; 968 error = e->ops ? entry->show(e, page) : -ENOENT;
971 mutex_unlock(&e->sysfs_lock); 969 mutex_unlock(&e->sysfs_lock);
@@ -976,13 +974,14 @@ static ssize_t
976elv_attr_store(struct kobject *kobj, struct attribute *attr, 974elv_attr_store(struct kobject *kobj, struct attribute *attr,
977 const char *page, size_t length) 975 const char *page, size_t length)
978{ 976{
979 elevator_t *e = container_of(kobj, elevator_t, kobj);
980 struct elv_fs_entry *entry = to_elv(attr); 977 struct elv_fs_entry *entry = to_elv(attr);
978 struct elevator_queue *e;
981 ssize_t error; 979 ssize_t error;
982 980
983 if (!entry->store) 981 if (!entry->store)
984 return -EIO; 982 return -EIO;
985 983
984 e = container_of(kobj, struct elevator_queue, kobj);
986 mutex_lock(&e->sysfs_lock); 985 mutex_lock(&e->sysfs_lock);
987 error = e->ops ? entry->store(e, page, length) : -ENOENT; 986 error = e->ops ? entry->store(e, page, length) : -ENOENT;
988 mutex_unlock(&e->sysfs_lock); 987 mutex_unlock(&e->sysfs_lock);
@@ -1001,7 +1000,7 @@ static struct kobj_type elv_ktype = {
1001 1000
1002int elv_register_queue(struct request_queue *q) 1001int elv_register_queue(struct request_queue *q)
1003{ 1002{
1004 elevator_t *e = q->elevator; 1003 struct elevator_queue *e = q->elevator;
1005 int error; 1004 int error;
1006 1005
1007 error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched"); 1006 error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
@@ -1019,7 +1018,7 @@ int elv_register_queue(struct request_queue *q)
1019 return error; 1018 return error;
1020} 1019}
1021 1020
1022static void __elv_unregister_queue(elevator_t *e) 1021static void __elv_unregister_queue(struct elevator_queue *e)
1023{ 1022{
1024 kobject_uevent(&e->kobj, KOBJ_REMOVE); 1023 kobject_uevent(&e->kobj, KOBJ_REMOVE);
1025 kobject_del(&e->kobj); 1024 kobject_del(&e->kobj);
@@ -1082,7 +1081,7 @@ EXPORT_SYMBOL_GPL(elv_unregister);
1082 */ 1081 */
1083static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) 1082static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1084{ 1083{
1085 elevator_t *old_elevator, *e; 1084 struct elevator_queue *old_elevator, *e;
1086 void *data; 1085 void *data;
1087 1086
1088 /* 1087 /*
@@ -1188,7 +1187,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
1188 1187
1189ssize_t elv_iosched_show(struct request_queue *q, char *name) 1188ssize_t elv_iosched_show(struct request_queue *q, char *name)
1190{ 1189{
1191 elevator_t *e = q->elevator; 1190 struct elevator_queue *e = q->elevator;
1192 struct elevator_type *elv = e->elevator_type; 1191 struct elevator_type *elv = e->elevator_type;
1193 struct elevator_type *__e; 1192 struct elevator_type *__e;
1194 int len = 0; 1193 int len = 0;
diff --git a/block/genhd.c b/block/genhd.c
index 2f7feda61e35..d84a7df1e2a0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -181,6 +181,12 @@ void disk_part_iter_exit(struct disk_part_iter *piter)
181} 181}
182EXPORT_SYMBOL_GPL(disk_part_iter_exit); 182EXPORT_SYMBOL_GPL(disk_part_iter_exit);
183 183
184static inline int sector_in_part(struct hd_struct *part, sector_t sector)
185{
186 return part->start_sect <= sector &&
187 sector < part->start_sect + part->nr_sects;
188}
189
184/** 190/**
185 * disk_map_sector_rcu - map sector to partition 191 * disk_map_sector_rcu - map sector to partition
186 * @disk: gendisk of interest 192 * @disk: gendisk of interest
@@ -199,16 +205,22 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
199struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 205struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
200{ 206{
201 struct disk_part_tbl *ptbl; 207 struct disk_part_tbl *ptbl;
208 struct hd_struct *part;
202 int i; 209 int i;
203 210
204 ptbl = rcu_dereference(disk->part_tbl); 211 ptbl = rcu_dereference(disk->part_tbl);
205 212
213 part = rcu_dereference(ptbl->last_lookup);
214 if (part && sector_in_part(part, sector))
215 return part;
216
206 for (i = 1; i < ptbl->len; i++) { 217 for (i = 1; i < ptbl->len; i++) {
207 struct hd_struct *part = rcu_dereference(ptbl->part[i]); 218 part = rcu_dereference(ptbl->part[i]);
208 219
209 if (part && part->start_sect <= sector && 220 if (part && sector_in_part(part, sector)) {
210 sector < part->start_sect + part->nr_sects) 221 rcu_assign_pointer(ptbl->last_lookup, part);
211 return part; 222 return part;
223 }
212 } 224 }
213 return &disk->part0; 225 return &disk->part0;
214} 226}
@@ -888,8 +900,11 @@ static void disk_replace_part_tbl(struct gendisk *disk,
888 struct disk_part_tbl *old_ptbl = disk->part_tbl; 900 struct disk_part_tbl *old_ptbl = disk->part_tbl;
889 901
890 rcu_assign_pointer(disk->part_tbl, new_ptbl); 902 rcu_assign_pointer(disk->part_tbl, new_ptbl);
891 if (old_ptbl) 903
904 if (old_ptbl) {
905 rcu_assign_pointer(old_ptbl->last_lookup, NULL);
892 call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); 906 call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
907 }
893} 908}
894 909
895/** 910/**
diff --git a/block/ioctl.c b/block/ioctl.c
index d03985b04d67..0f22e629b13c 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -323,9 +323,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
323 bdi = blk_get_backing_dev_info(bdev); 323 bdi = blk_get_backing_dev_info(bdev);
324 if (bdi == NULL) 324 if (bdi == NULL)
325 return -ENOTTY; 325 return -ENOTTY;
326 lock_kernel();
327 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; 326 bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
328 unlock_kernel();
329 return 0; 327 return 0;
330 case BLKBSZSET: 328 case BLKBSZSET:
331 /* set the logical block size */ 329 /* set the logical block size */
diff --git a/block/noop-iosched.c b/block/noop-iosched.c
index c23e02969650..3a0d369d08c7 100644
--- a/block/noop-iosched.c
+++ b/block/noop-iosched.c
@@ -76,7 +76,7 @@ static void *noop_init_queue(struct request_queue *q)
76 return nd; 76 return nd;
77} 77}
78 78
79static void noop_exit_queue(elevator_t *e) 79static void noop_exit_queue(struct elevator_queue *e)
80{ 80{
81 struct noop_data *nd = e->elevator_data; 81 struct noop_data *nd = e->elevator_data;
82 82
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index d0bb92cbefb9..ee9c67d7e1be 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -60,7 +60,7 @@ static int scsi_get_bus(struct request_queue *q, int __user *p)
60 60
61static int sg_get_timeout(struct request_queue *q) 61static int sg_get_timeout(struct request_queue *q)
62{ 62{
63 return q->sg_timeout / (HZ / USER_HZ); 63 return jiffies_to_clock_t(q->sg_timeout);
64} 64}
65 65
66static int sg_set_timeout(struct request_queue *q, int __user *p) 66static int sg_set_timeout(struct request_queue *q, int __user *p)
@@ -68,7 +68,7 @@ static int sg_set_timeout(struct request_queue *q, int __user *p)
68 int timeout, err = get_user(timeout, p); 68 int timeout, err = get_user(timeout, p);
69 69
70 if (!err) 70 if (!err)
71 q->sg_timeout = timeout * (HZ / USER_HZ); 71 q->sg_timeout = clock_t_to_jiffies(timeout);
72 72
73 return err; 73 return err;
74} 74}
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 9f7c543cc04b..01e69383d9c0 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -164,7 +164,7 @@ static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
164 164
165static int cciss_revalidate(struct gendisk *disk); 165static int cciss_revalidate(struct gendisk *disk);
166static int rebuild_lun_table(ctlr_info_t *h, int first_time); 166static int rebuild_lun_table(ctlr_info_t *h, int first_time);
167static int deregister_disk(struct gendisk *disk, drive_info_struct *drv, 167static int deregister_disk(ctlr_info_t *h, int drv_index,
168 int clear_all); 168 int clear_all);
169 169
170static void cciss_read_capacity(int ctlr, int logvol, int withirq, 170static void cciss_read_capacity(int ctlr, int logvol, int withirq,
@@ -215,31 +215,17 @@ static struct block_device_operations cciss_fops = {
215/* 215/*
216 * Enqueuing and dequeuing functions for cmdlists. 216 * Enqueuing and dequeuing functions for cmdlists.
217 */ 217 */
218static inline void addQ(CommandList_struct **Qptr, CommandList_struct *c) 218static inline void addQ(struct hlist_head *list, CommandList_struct *c)
219{ 219{
220 if (*Qptr == NULL) { 220 hlist_add_head(&c->list, list);
221 *Qptr = c;
222 c->next = c->prev = c;
223 } else {
224 c->prev = (*Qptr)->prev;
225 c->next = (*Qptr);
226 (*Qptr)->prev->next = c;
227 (*Qptr)->prev = c;
228 }
229} 221}
230 222
231static inline CommandList_struct *removeQ(CommandList_struct **Qptr, 223static inline void removeQ(CommandList_struct *c)
232 CommandList_struct *c)
233{ 224{
234 if (c && c->next != c) { 225 if (WARN_ON(hlist_unhashed(&c->list)))
235 if (*Qptr == c) 226 return;
236 *Qptr = c->next; 227
237 c->prev->next = c->next; 228 hlist_del_init(&c->list);
238 c->next->prev = c->prev;
239 } else {
240 *Qptr = NULL;
241 }
242 return c;
243} 229}
244 230
245#include "cciss_scsi.c" /* For SCSI tape support */ 231#include "cciss_scsi.c" /* For SCSI tape support */
@@ -506,6 +492,7 @@ static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool)
506 c->cmdindex = i; 492 c->cmdindex = i;
507 } 493 }
508 494
495 INIT_HLIST_NODE(&c->list);
509 c->busaddr = (__u32) cmd_dma_handle; 496 c->busaddr = (__u32) cmd_dma_handle;
510 temp64.val = (__u64) err_dma_handle; 497 temp64.val = (__u64) err_dma_handle;
511 c->ErrDesc.Addr.lower = temp64.val32.lower; 498 c->ErrDesc.Addr.lower = temp64.val32.lower;
@@ -1492,8 +1479,7 @@ static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
1492 * which keeps the interrupt handler from starting 1479 * which keeps the interrupt handler from starting
1493 * the queue. 1480 * the queue.
1494 */ 1481 */
1495 ret = deregister_disk(h->gendisk[drv_index], 1482 ret = deregister_disk(h, drv_index, 0);
1496 &h->drv[drv_index], 0);
1497 h->drv[drv_index].busy_configuring = 0; 1483 h->drv[drv_index].busy_configuring = 0;
1498 } 1484 }
1499 1485
@@ -1711,8 +1697,7 @@ static int rebuild_lun_table(ctlr_info_t *h, int first_time)
1711 spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags); 1697 spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1712 h->drv[i].busy_configuring = 1; 1698 h->drv[i].busy_configuring = 1;
1713 spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags); 1699 spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1714 return_code = deregister_disk(h->gendisk[i], 1700 return_code = deregister_disk(h, i, 1);
1715 &h->drv[i], 1);
1716 h->drv[i].busy_configuring = 0; 1701 h->drv[i].busy_configuring = 0;
1717 } 1702 }
1718 } 1703 }
@@ -1782,15 +1767,19 @@ mem_msg:
1782 * the highest_lun should be left unchanged and the LunID 1767 * the highest_lun should be left unchanged and the LunID
1783 * should not be cleared. 1768 * should not be cleared.
1784*/ 1769*/
1785static int deregister_disk(struct gendisk *disk, drive_info_struct *drv, 1770static int deregister_disk(ctlr_info_t *h, int drv_index,
1786 int clear_all) 1771 int clear_all)
1787{ 1772{
1788 int i; 1773 int i;
1789 ctlr_info_t *h = get_host(disk); 1774 struct gendisk *disk;
1775 drive_info_struct *drv;
1790 1776
1791 if (!capable(CAP_SYS_RAWIO)) 1777 if (!capable(CAP_SYS_RAWIO))
1792 return -EPERM; 1778 return -EPERM;
1793 1779
1780 drv = &h->drv[drv_index];
1781 disk = h->gendisk[drv_index];
1782
1794 /* make sure logical volume is NOT is use */ 1783 /* make sure logical volume is NOT is use */
1795 if (clear_all || (h->gendisk[0] == disk)) { 1784 if (clear_all || (h->gendisk[0] == disk)) {
1796 if (drv->usage_count > 1) 1785 if (drv->usage_count > 1)
@@ -2548,7 +2537,8 @@ static void start_io(ctlr_info_t *h)
2548{ 2537{
2549 CommandList_struct *c; 2538 CommandList_struct *c;
2550 2539
2551 while ((c = h->reqQ) != NULL) { 2540 while (!hlist_empty(&h->reqQ)) {
2541 c = hlist_entry(h->reqQ.first, CommandList_struct, list);
2552 /* can't do anything if fifo is full */ 2542 /* can't do anything if fifo is full */
2553 if ((h->access.fifo_full(h))) { 2543 if ((h->access.fifo_full(h))) {
2554 printk(KERN_WARNING "cciss: fifo full\n"); 2544 printk(KERN_WARNING "cciss: fifo full\n");
@@ -2556,14 +2546,14 @@ static void start_io(ctlr_info_t *h)
2556 } 2546 }
2557 2547
2558 /* Get the first entry from the Request Q */ 2548 /* Get the first entry from the Request Q */
2559 removeQ(&(h->reqQ), c); 2549 removeQ(c);
2560 h->Qdepth--; 2550 h->Qdepth--;
2561 2551
2562 /* Tell the controller execute command */ 2552 /* Tell the controller execute command */
2563 h->access.submit_command(h, c); 2553 h->access.submit_command(h, c);
2564 2554
2565 /* Put job onto the completed Q */ 2555 /* Put job onto the completed Q */
2566 addQ(&(h->cmpQ), c); 2556 addQ(&h->cmpQ, c);
2567 } 2557 }
2568} 2558}
2569 2559
@@ -2576,7 +2566,7 @@ static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
2576 memset(c->err_info, 0, sizeof(ErrorInfo_struct)); 2566 memset(c->err_info, 0, sizeof(ErrorInfo_struct));
2577 2567
2578 /* add it to software queue and then send it to the controller */ 2568 /* add it to software queue and then send it to the controller */
2579 addQ(&(h->reqQ), c); 2569 addQ(&h->reqQ, c);
2580 h->Qdepth++; 2570 h->Qdepth++;
2581 if (h->Qdepth > h->maxQsinceinit) 2571 if (h->Qdepth > h->maxQsinceinit)
2582 h->maxQsinceinit = h->Qdepth; 2572 h->maxQsinceinit = h->Qdepth;
@@ -2897,7 +2887,7 @@ static void do_cciss_request(struct request_queue *q)
2897 2887
2898 spin_lock_irq(q->queue_lock); 2888 spin_lock_irq(q->queue_lock);
2899 2889
2900 addQ(&(h->reqQ), c); 2890 addQ(&h->reqQ, c);
2901 h->Qdepth++; 2891 h->Qdepth++;
2902 if (h->Qdepth > h->maxQsinceinit) 2892 if (h->Qdepth > h->maxQsinceinit)
2903 h->maxQsinceinit = h->Qdepth; 2893 h->maxQsinceinit = h->Qdepth;
@@ -2985,16 +2975,12 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
2985 a = c->busaddr; 2975 a = c->busaddr;
2986 2976
2987 } else { 2977 } else {
2978 struct hlist_node *tmp;
2979
2988 a &= ~3; 2980 a &= ~3;
2989 if ((c = h->cmpQ) == NULL) { 2981 c = NULL;
2990 printk(KERN_WARNING 2982 hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
2991 "cciss: Completion of %08x ignored\n", 2983 if (c->busaddr == a)
2992 a1);
2993 continue;
2994 }
2995 while (c->busaddr != a) {
2996 c = c->next;
2997 if (c == h->cmpQ)
2998 break; 2984 break;
2999 } 2985 }
3000 } 2986 }
@@ -3002,8 +2988,8 @@ static irqreturn_t do_cciss_intr(int irq, void *dev_id)
3002 * If we've found the command, take it off the 2988 * If we've found the command, take it off the
3003 * completion Q and free it 2989 * completion Q and free it
3004 */ 2990 */
3005 if (c->busaddr == a) { 2991 if (c && c->busaddr == a) {
3006 removeQ(&h->cmpQ, c); 2992 removeQ(c);
3007 if (c->cmd_type == CMD_RWREQ) { 2993 if (c->cmd_type == CMD_RWREQ) {
3008 complete_command(h, c, 0); 2994 complete_command(h, c, 0);
3009 } else if (c->cmd_type == CMD_IOCTL_PEND) { 2995 } else if (c->cmd_type == CMD_IOCTL_PEND) {
@@ -3423,6 +3409,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
3423 return -1; 3409 return -1;
3424 3410
3425 hba[i]->busy_initializing = 1; 3411 hba[i]->busy_initializing = 1;
3412 INIT_HLIST_HEAD(&hba[i]->cmpQ);
3413 INIT_HLIST_HEAD(&hba[i]->reqQ);
3426 3414
3427 if (cciss_pci_init(hba[i], pdev) != 0) 3415 if (cciss_pci_init(hba[i], pdev) != 0)
3428 goto clean1; 3416 goto clean1;
@@ -3730,15 +3718,17 @@ static void fail_all_cmds(unsigned long ctlr)
3730 pci_disable_device(h->pdev); /* Make sure it is really dead. */ 3718 pci_disable_device(h->pdev); /* Make sure it is really dead. */
3731 3719
3732 /* move everything off the request queue onto the completed queue */ 3720 /* move everything off the request queue onto the completed queue */
3733 while ((c = h->reqQ) != NULL) { 3721 while (!hlist_empty(&h->reqQ)) {
3734 removeQ(&(h->reqQ), c); 3722 c = hlist_entry(h->reqQ.first, CommandList_struct, list);
3723 removeQ(c);
3735 h->Qdepth--; 3724 h->Qdepth--;
3736 addQ(&(h->cmpQ), c); 3725 addQ(&h->cmpQ, c);
3737 } 3726 }
3738 3727
3739 /* Now, fail everything on the completed queue with a HW error */ 3728 /* Now, fail everything on the completed queue with a HW error */
3740 while ((c = h->cmpQ) != NULL) { 3729 while (!hlist_empty(&h->cmpQ)) {
3741 removeQ(&h->cmpQ, c); 3730 c = hlist_entry(h->cmpQ.first, CommandList_struct, list);
3731 removeQ(c);
3742 c->err_info->CommandStatus = CMD_HARDWARE_ERR; 3732 c->err_info->CommandStatus = CMD_HARDWARE_ERR;
3743 if (c->cmd_type == CMD_RWREQ) { 3733 if (c->cmd_type == CMD_RWREQ) {
3744 complete_command(h, c, 0); 3734 complete_command(h, c, 0);
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 24a7efa993ab..15e2b84734e3 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -89,8 +89,8 @@ struct ctlr_info
89 struct access_method access; 89 struct access_method access;
90 90
91 /* queue and queue Info */ 91 /* queue and queue Info */
92 CommandList_struct *reqQ; 92 struct hlist_head reqQ;
93 CommandList_struct *cmpQ; 93 struct hlist_head cmpQ;
94 unsigned int Qdepth; 94 unsigned int Qdepth;
95 unsigned int maxQsinceinit; 95 unsigned int maxQsinceinit;
96 unsigned int maxSG; 96 unsigned int maxSG;
diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index 43bf5593b59b..24e22dea1a99 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h
@@ -265,8 +265,7 @@ typedef struct _CommandList_struct {
265 int ctlr; 265 int ctlr;
266 int cmd_type; 266 int cmd_type;
267 long cmdindex; 267 long cmdindex;
268 struct _CommandList_struct *prev; 268 struct hlist_node list;
269 struct _CommandList_struct *next;
270 struct request * rq; 269 struct request * rq;
271 struct completion *waiting; 270 struct completion *waiting;
272 int retry_count; 271 int retry_count;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index fb06ed659212..edbaac6c0573 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -624,20 +624,38 @@ static int loop_switch(struct loop_device *lo, struct file *file)
624} 624}
625 625
626/* 626/*
627 * Helper to flush the IOs in loop, but keeping loop thread running
628 */
629static int loop_flush(struct loop_device *lo)
630{
631 /* loop not yet configured, no running thread, nothing to flush */
632 if (!lo->lo_thread)
633 return 0;
634
635 return loop_switch(lo, NULL);
636}
637
638/*
627 * Do the actual switch; called from the BIO completion routine 639 * Do the actual switch; called from the BIO completion routine
628 */ 640 */
629static void do_loop_switch(struct loop_device *lo, struct switch_request *p) 641static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
630{ 642{
631 struct file *file = p->file; 643 struct file *file = p->file;
632 struct file *old_file = lo->lo_backing_file; 644 struct file *old_file = lo->lo_backing_file;
633 struct address_space *mapping = file->f_mapping; 645 struct address_space *mapping;
646
647 /* if no new file, only flush of queued bios requested */
648 if (!file)
649 goto out;
634 650
651 mapping = file->f_mapping;
635 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); 652 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
636 lo->lo_backing_file = file; 653 lo->lo_backing_file = file;
637 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ? 654 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
638 mapping->host->i_bdev->bd_block_size : PAGE_SIZE; 655 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
639 lo->old_gfp_mask = mapping_gfp_mask(mapping); 656 lo->old_gfp_mask = mapping_gfp_mask(mapping);
640 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 657 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
658out:
641 complete(&p->wait); 659 complete(&p->wait);
642} 660}
643 661
@@ -901,6 +919,7 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
901 919
902 kthread_stop(lo->lo_thread); 920 kthread_stop(lo->lo_thread);
903 921
922 lo->lo_queue->unplug_fn = NULL;
904 lo->lo_backing_file = NULL; 923 lo->lo_backing_file = NULL;
905 924
906 loop_release_xfer(lo); 925 loop_release_xfer(lo);
@@ -1345,11 +1364,25 @@ static int lo_release(struct gendisk *disk, fmode_t mode)
1345 struct loop_device *lo = disk->private_data; 1364 struct loop_device *lo = disk->private_data;
1346 1365
1347 mutex_lock(&lo->lo_ctl_mutex); 1366 mutex_lock(&lo->lo_ctl_mutex);
1348 --lo->lo_refcnt;
1349 1367
1350 if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) && !lo->lo_refcnt) 1368 if (--lo->lo_refcnt)
1369 goto out;
1370
1371 if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
1372 /*
1373 * In autoclear mode, stop the loop thread
1374 * and remove configuration after last close.
1375 */
1351 loop_clr_fd(lo, NULL); 1376 loop_clr_fd(lo, NULL);
1377 } else {
1378 /*
1379 * Otherwise keep thread (if running) and config,
1380 * but flush possible ongoing bios in thread.
1381 */
1382 loop_flush(lo);
1383 }
1352 1384
1385out:
1353 mutex_unlock(&lo->lo_ctl_mutex); 1386 mutex_unlock(&lo->lo_ctl_mutex);
1354 1387
1355 return 0; 1388 return 0;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index d3a91cacee8c..7bcc1d8bc967 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -722,7 +722,6 @@ static int __init nbd_init(void)
722 722
723 for (i = 0; i < nbds_max; i++) { 723 for (i = 0; i < nbds_max; i++) {
724 struct gendisk *disk = alloc_disk(1 << part_shift); 724 struct gendisk *disk = alloc_disk(1 << part_shift);
725 elevator_t *old_e;
726 if (!disk) 725 if (!disk)
727 goto out; 726 goto out;
728 nbd_dev[i].disk = disk; 727 nbd_dev[i].disk = disk;
@@ -736,11 +735,10 @@ static int __init nbd_init(void)
736 put_disk(disk); 735 put_disk(disk);
737 goto out; 736 goto out;
738 } 737 }
739 old_e = disk->queue->elevator; 738 /*
740 if (elevator_init(disk->queue, "deadline") == 0 || 739 * Tell the block layer that we are not a rotational device
741 elevator_init(disk->queue, "noop") == 0) { 740 */
742 elevator_exit(old_e); 741 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
743 }
744 } 742 }
745 743
746 if (register_blkdev(NBD_MAJOR, "nbd")) { 744 if (register_blkdev(NBD_MAJOR, "nbd")) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 85d79a02d487..f151592ecf73 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -237,6 +237,8 @@ static int virtblk_probe(struct virtio_device *vdev)
237 goto out_put_disk; 237 goto out_put_disk;
238 } 238 }
239 239
240 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, vblk->disk->queue);
241
240 if (index < 26) { 242 if (index < 26) {
241 sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26); 243 sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
242 } else if (index < (26 + 1) * 26) { 244 } else if (index < (26 + 1) * 26) {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2d19f0cc47f2..918ef725de41 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -338,18 +338,12 @@ wait:
338static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size) 338static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
339{ 339{
340 struct request_queue *rq; 340 struct request_queue *rq;
341 elevator_t *old_e;
342 341
343 rq = blk_init_queue(do_blkif_request, &blkif_io_lock); 342 rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
344 if (rq == NULL) 343 if (rq == NULL)
345 return -1; 344 return -1;
346 345
347 old_e = rq->elevator; 346 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
348 if (IS_ERR_VALUE(elevator_init(rq, "noop")))
349 printk(KERN_WARNING
350 "blkfront: Switch elevator failed, use default\n");
351 else
352 elevator_exit(old_e);
353 347
354 /* Hard sector size and max sectors impersonate the equiv. hardware. */ 348 /* Hard sector size and max sectors impersonate the equiv. hardware. */
355 blk_queue_hardsect_size(rq, sector_size); 349 blk_queue_hardsect_size(rq, sector_size);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 7d2e91cccb13..cceace61ef28 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1712,29 +1712,30 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai)
1712 return 0; 1712 return 0;
1713} 1713}
1714 1714
1715static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s) 1715static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s,
1716 struct packet_command *cgc)
1716{ 1717{
1717 unsigned char buf[21], *base; 1718 unsigned char buf[21], *base;
1718 struct dvd_layer *layer; 1719 struct dvd_layer *layer;
1719 struct packet_command cgc;
1720 struct cdrom_device_ops *cdo = cdi->ops; 1720 struct cdrom_device_ops *cdo = cdi->ops;
1721 int ret, layer_num = s->physical.layer_num; 1721 int ret, layer_num = s->physical.layer_num;
1722 1722
1723 if (layer_num >= DVD_LAYERS) 1723 if (layer_num >= DVD_LAYERS)
1724 return -EINVAL; 1724 return -EINVAL;
1725 1725
1726 init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); 1726 init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ);
1727 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; 1727 cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
1728 cgc.cmd[6] = layer_num; 1728 cgc->cmd[6] = layer_num;
1729 cgc.cmd[7] = s->type; 1729 cgc->cmd[7] = s->type;
1730 cgc.cmd[9] = cgc.buflen & 0xff; 1730 cgc->cmd[9] = cgc->buflen & 0xff;
1731 1731
1732 /* 1732 /*
1733 * refrain from reporting errors on non-existing layers (mainly) 1733 * refrain from reporting errors on non-existing layers (mainly)
1734 */ 1734 */
1735 cgc.quiet = 1; 1735 cgc->quiet = 1;
1736 1736
1737 if ((ret = cdo->generic_packet(cdi, &cgc))) 1737 ret = cdo->generic_packet(cdi, cgc);
1738 if (ret)
1738 return ret; 1739 return ret;
1739 1740
1740 base = &buf[4]; 1741 base = &buf[4];
@@ -1762,21 +1763,22 @@ static int dvd_read_physical(struct cdrom_device_info *cdi, dvd_struct *s)
1762 return 0; 1763 return 0;
1763} 1764}
1764 1765
1765static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s) 1766static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s,
1767 struct packet_command *cgc)
1766{ 1768{
1767 int ret; 1769 int ret;
1768 u_char buf[8]; 1770 u_char buf[8];
1769 struct packet_command cgc;
1770 struct cdrom_device_ops *cdo = cdi->ops; 1771 struct cdrom_device_ops *cdo = cdi->ops;
1771 1772
1772 init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); 1773 init_cdrom_command(cgc, buf, sizeof(buf), CGC_DATA_READ);
1773 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; 1774 cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
1774 cgc.cmd[6] = s->copyright.layer_num; 1775 cgc->cmd[6] = s->copyright.layer_num;
1775 cgc.cmd[7] = s->type; 1776 cgc->cmd[7] = s->type;
1776 cgc.cmd[8] = cgc.buflen >> 8; 1777 cgc->cmd[8] = cgc->buflen >> 8;
1777 cgc.cmd[9] = cgc.buflen & 0xff; 1778 cgc->cmd[9] = cgc->buflen & 0xff;
1778 1779
1779 if ((ret = cdo->generic_packet(cdi, &cgc))) 1780 ret = cdo->generic_packet(cdi, cgc);
1781 if (ret)
1780 return ret; 1782 return ret;
1781 1783
1782 s->copyright.cpst = buf[4]; 1784 s->copyright.cpst = buf[4];
@@ -1785,79 +1787,89 @@ static int dvd_read_copyright(struct cdrom_device_info *cdi, dvd_struct *s)
1785 return 0; 1787 return 0;
1786} 1788}
1787 1789
1788static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s) 1790static int dvd_read_disckey(struct cdrom_device_info *cdi, dvd_struct *s,
1791 struct packet_command *cgc)
1789{ 1792{
1790 int ret, size; 1793 int ret, size;
1791 u_char *buf; 1794 u_char *buf;
1792 struct packet_command cgc;
1793 struct cdrom_device_ops *cdo = cdi->ops; 1795 struct cdrom_device_ops *cdo = cdi->ops;
1794 1796
1795 size = sizeof(s->disckey.value) + 4; 1797 size = sizeof(s->disckey.value) + 4;
1796 1798
1797 if ((buf = kmalloc(size, GFP_KERNEL)) == NULL) 1799 buf = kmalloc(size, GFP_KERNEL);
1800 if (!buf)
1798 return -ENOMEM; 1801 return -ENOMEM;
1799 1802
1800 init_cdrom_command(&cgc, buf, size, CGC_DATA_READ); 1803 init_cdrom_command(cgc, buf, size, CGC_DATA_READ);
1801 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; 1804 cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
1802 cgc.cmd[7] = s->type; 1805 cgc->cmd[7] = s->type;
1803 cgc.cmd[8] = size >> 8; 1806 cgc->cmd[8] = size >> 8;
1804 cgc.cmd[9] = size & 0xff; 1807 cgc->cmd[9] = size & 0xff;
1805 cgc.cmd[10] = s->disckey.agid << 6; 1808 cgc->cmd[10] = s->disckey.agid << 6;
1806 1809
1807 if (!(ret = cdo->generic_packet(cdi, &cgc))) 1810 ret = cdo->generic_packet(cdi, cgc);
1811 if (!ret)
1808 memcpy(s->disckey.value, &buf[4], sizeof(s->disckey.value)); 1812 memcpy(s->disckey.value, &buf[4], sizeof(s->disckey.value));
1809 1813
1810 kfree(buf); 1814 kfree(buf);
1811 return ret; 1815 return ret;
1812} 1816}
1813 1817
1814static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s) 1818static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s,
1819 struct packet_command *cgc)
1815{ 1820{
1816 int ret; 1821 int ret, size = 4 + 188;
1817 u_char buf[4 + 188]; 1822 u_char *buf;
1818 struct packet_command cgc;
1819 struct cdrom_device_ops *cdo = cdi->ops; 1823 struct cdrom_device_ops *cdo = cdi->ops;
1820 1824
1821 init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); 1825 buf = kmalloc(size, GFP_KERNEL);
1822 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; 1826 if (!buf)
1823 cgc.cmd[7] = s->type; 1827 return -ENOMEM;
1824 cgc.cmd[9] = cgc.buflen & 0xff;
1825 1828
1826 if ((ret = cdo->generic_packet(cdi, &cgc))) 1829 init_cdrom_command(cgc, buf, size, CGC_DATA_READ);
1827 return ret; 1830 cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
1831 cgc->cmd[7] = s->type;
1832 cgc->cmd[9] = cgc->buflen & 0xff;
1833
1834 ret = cdo->generic_packet(cdi, cgc);
1835 if (ret)
1836 goto out;
1828 1837
1829 s->bca.len = buf[0] << 8 | buf[1]; 1838 s->bca.len = buf[0] << 8 | buf[1];
1830 if (s->bca.len < 12 || s->bca.len > 188) { 1839 if (s->bca.len < 12 || s->bca.len > 188) {
1831 cdinfo(CD_WARNING, "Received invalid BCA length (%d)\n", s->bca.len); 1840 cdinfo(CD_WARNING, "Received invalid BCA length (%d)\n", s->bca.len);
1832 return -EIO; 1841 ret = -EIO;
1842 goto out;
1833 } 1843 }
1834 memcpy(s->bca.value, &buf[4], s->bca.len); 1844 memcpy(s->bca.value, &buf[4], s->bca.len);
1835 1845 ret = 0;
1836 return 0; 1846out:
1847 kfree(buf);
1848 return ret;
1837} 1849}
1838 1850
1839static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s) 1851static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s,
1852 struct packet_command *cgc)
1840{ 1853{
1841 int ret = 0, size; 1854 int ret = 0, size;
1842 u_char *buf; 1855 u_char *buf;
1843 struct packet_command cgc;
1844 struct cdrom_device_ops *cdo = cdi->ops; 1856 struct cdrom_device_ops *cdo = cdi->ops;
1845 1857
1846 size = sizeof(s->manufact.value) + 4; 1858 size = sizeof(s->manufact.value) + 4;
1847 1859
1848 if ((buf = kmalloc(size, GFP_KERNEL)) == NULL) 1860 buf = kmalloc(size, GFP_KERNEL);
1861 if (!buf)
1849 return -ENOMEM; 1862 return -ENOMEM;
1850 1863
1851 init_cdrom_command(&cgc, buf, size, CGC_DATA_READ); 1864 init_cdrom_command(cgc, buf, size, CGC_DATA_READ);
1852 cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE; 1865 cgc->cmd[0] = GPCMD_READ_DVD_STRUCTURE;
1853 cgc.cmd[7] = s->type; 1866 cgc->cmd[7] = s->type;
1854 cgc.cmd[8] = size >> 8; 1867 cgc->cmd[8] = size >> 8;
1855 cgc.cmd[9] = size & 0xff; 1868 cgc->cmd[9] = size & 0xff;
1856 1869
1857 if ((ret = cdo->generic_packet(cdi, &cgc))) { 1870 ret = cdo->generic_packet(cdi, cgc);
1858 kfree(buf); 1871 if (ret)
1859 return ret; 1872 goto out;
1860 }
1861 1873
1862 s->manufact.len = buf[0] << 8 | buf[1]; 1874 s->manufact.len = buf[0] << 8 | buf[1];
1863 if (s->manufact.len < 0 || s->manufact.len > 2048) { 1875 if (s->manufact.len < 0 || s->manufact.len > 2048) {
@@ -1868,27 +1880,29 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s)
1868 memcpy(s->manufact.value, &buf[4], s->manufact.len); 1880 memcpy(s->manufact.value, &buf[4], s->manufact.len);
1869 } 1881 }
1870 1882
1883out:
1871 kfree(buf); 1884 kfree(buf);
1872 return ret; 1885 return ret;
1873} 1886}
1874 1887
1875static int dvd_read_struct(struct cdrom_device_info *cdi, dvd_struct *s) 1888static int dvd_read_struct(struct cdrom_device_info *cdi, dvd_struct *s,
1889 struct packet_command *cgc)
1876{ 1890{
1877 switch (s->type) { 1891 switch (s->type) {
1878 case DVD_STRUCT_PHYSICAL: 1892 case DVD_STRUCT_PHYSICAL:
1879 return dvd_read_physical(cdi, s); 1893 return dvd_read_physical(cdi, s, cgc);
1880 1894
1881 case DVD_STRUCT_COPYRIGHT: 1895 case DVD_STRUCT_COPYRIGHT:
1882 return dvd_read_copyright(cdi, s); 1896 return dvd_read_copyright(cdi, s, cgc);
1883 1897
1884 case DVD_STRUCT_DISCKEY: 1898 case DVD_STRUCT_DISCKEY:
1885 return dvd_read_disckey(cdi, s); 1899 return dvd_read_disckey(cdi, s, cgc);
1886 1900
1887 case DVD_STRUCT_BCA: 1901 case DVD_STRUCT_BCA:
1888 return dvd_read_bca(cdi, s); 1902 return dvd_read_bca(cdi, s, cgc);
1889 1903
1890 case DVD_STRUCT_MANUFACT: 1904 case DVD_STRUCT_MANUFACT:
1891 return dvd_read_manufact(cdi, s); 1905 return dvd_read_manufact(cdi, s, cgc);
1892 1906
1893 default: 1907 default:
1894 cdinfo(CD_WARNING, ": Invalid DVD structure read requested (%d)\n", 1908 cdinfo(CD_WARNING, ": Invalid DVD structure read requested (%d)\n",
@@ -2787,271 +2801,360 @@ static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size)
2787 return cdo->generic_packet(cdi, &cgc); 2801 return cdo->generic_packet(cdi, &cgc);
2788} 2802}
2789 2803
2790static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, 2804static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi,
2791 unsigned long arg) 2805 void __user *arg,
2792{ 2806 struct packet_command *cgc,
2793 struct cdrom_device_ops *cdo = cdi->ops; 2807 int cmd)
2794 struct packet_command cgc; 2808{
2795 struct request_sense sense; 2809 struct request_sense sense;
2796 unsigned char buffer[32]; 2810 struct cdrom_msf msf;
2797 int ret = 0; 2811 int blocksize = 0, format = 0, lba;
2798 2812 int ret;
2799 memset(&cgc, 0, sizeof(cgc));
2800 2813
2801 /* build a unified command and queue it through
2802 cdo->generic_packet() */
2803 switch (cmd) { 2814 switch (cmd) {
2804 case CDROMREADRAW: 2815 case CDROMREADRAW:
2816 blocksize = CD_FRAMESIZE_RAW;
2817 break;
2805 case CDROMREADMODE1: 2818 case CDROMREADMODE1:
2806 case CDROMREADMODE2: { 2819 blocksize = CD_FRAMESIZE;
2807 struct cdrom_msf msf; 2820 format = 2;
2808 int blocksize = 0, format = 0, lba; 2821 break;
2809 2822 case CDROMREADMODE2:
2810 switch (cmd) { 2823 blocksize = CD_FRAMESIZE_RAW0;
2811 case CDROMREADRAW: 2824 break;
2812 blocksize = CD_FRAMESIZE_RAW; 2825 }
2813 break; 2826 IOCTL_IN(arg, struct cdrom_msf, msf);
2814 case CDROMREADMODE1: 2827 lba = msf_to_lba(msf.cdmsf_min0, msf.cdmsf_sec0, msf.cdmsf_frame0);
2815 blocksize = CD_FRAMESIZE; 2828 /* FIXME: we need upper bound checking, too!! */
2816 format = 2; 2829 if (lba < 0)
2817 break; 2830 return -EINVAL;
2818 case CDROMREADMODE2: 2831
2819 blocksize = CD_FRAMESIZE_RAW0; 2832 cgc->buffer = kmalloc(blocksize, GFP_KERNEL);
2820 break; 2833 if (cgc->buffer == NULL)
2821 } 2834 return -ENOMEM;
2822 IOCTL_IN(arg, struct cdrom_msf, msf); 2835
2823 lba = msf_to_lba(msf.cdmsf_min0,msf.cdmsf_sec0,msf.cdmsf_frame0); 2836 memset(&sense, 0, sizeof(sense));
2824 /* FIXME: we need upper bound checking, too!! */ 2837 cgc->sense = &sense;
2825 if (lba < 0) 2838 cgc->data_direction = CGC_DATA_READ;
2826 return -EINVAL; 2839 ret = cdrom_read_block(cdi, cgc, lba, 1, format, blocksize);
2827 cgc.buffer = kmalloc(blocksize, GFP_KERNEL); 2840 if (ret && sense.sense_key == 0x05 &&
2828 if (cgc.buffer == NULL) 2841 sense.asc == 0x20 &&
2829 return -ENOMEM; 2842 sense.ascq == 0x00) {
2830 memset(&sense, 0, sizeof(sense)); 2843 /*
2831 cgc.sense = &sense; 2844 * SCSI-II devices are not required to support
2832 cgc.data_direction = CGC_DATA_READ; 2845 * READ_CD, so let's try switching block size
2833 ret = cdrom_read_block(cdi, &cgc, lba, 1, format, blocksize); 2846 */
2834 if (ret && sense.sense_key==0x05 && sense.asc==0x20 && sense.ascq==0x00) { 2847 /* FIXME: switch back again... */
2835 /* 2848 ret = cdrom_switch_blocksize(cdi, blocksize);
2836 * SCSI-II devices are not required to support 2849 if (ret)
2837 * READ_CD, so let's try switching block size 2850 goto out;
2838 */ 2851 cgc->sense = NULL;
2839 /* FIXME: switch back again... */ 2852 ret = cdrom_read_cd(cdi, cgc, lba, blocksize, 1);
2840 if ((ret = cdrom_switch_blocksize(cdi, blocksize))) { 2853 ret |= cdrom_switch_blocksize(cdi, blocksize);
2841 kfree(cgc.buffer); 2854 }
2842 return ret; 2855 if (!ret && copy_to_user(arg, cgc->buffer, blocksize))
2843 } 2856 ret = -EFAULT;
2844 cgc.sense = NULL; 2857out:
2845 ret = cdrom_read_cd(cdi, &cgc, lba, blocksize, 1); 2858 kfree(cgc->buffer);
2846 ret |= cdrom_switch_blocksize(cdi, blocksize); 2859 return ret;
2847 } 2860}
2848 if (!ret && copy_to_user((char __user *)arg, cgc.buffer, blocksize)) 2861
2849 ret = -EFAULT; 2862static noinline int mmc_ioctl_cdrom_read_audio(struct cdrom_device_info *cdi,
2850 kfree(cgc.buffer); 2863 void __user *arg)
2864{
2865 struct cdrom_read_audio ra;
2866 int lba;
2867
2868 IOCTL_IN(arg, struct cdrom_read_audio, ra);
2869
2870 if (ra.addr_format == CDROM_MSF)
2871 lba = msf_to_lba(ra.addr.msf.minute,
2872 ra.addr.msf.second,
2873 ra.addr.msf.frame);
2874 else if (ra.addr_format == CDROM_LBA)
2875 lba = ra.addr.lba;
2876 else
2877 return -EINVAL;
2878
2879 /* FIXME: we need upper bound checking, too!! */
2880 if (lba < 0 || ra.nframes <= 0 || ra.nframes > CD_FRAMES)
2881 return -EINVAL;
2882
2883 return cdrom_read_cdda(cdi, ra.buf, lba, ra.nframes);
2884}
2885
2886static noinline int mmc_ioctl_cdrom_subchannel(struct cdrom_device_info *cdi,
2887 void __user *arg)
2888{
2889 int ret;
2890 struct cdrom_subchnl q;
2891 u_char requested, back;
2892 IOCTL_IN(arg, struct cdrom_subchnl, q);
2893 requested = q.cdsc_format;
2894 if (!((requested == CDROM_MSF) ||
2895 (requested == CDROM_LBA)))
2896 return -EINVAL;
2897 q.cdsc_format = CDROM_MSF;
2898 ret = cdrom_read_subchannel(cdi, &q, 0);
2899 if (ret)
2851 return ret; 2900 return ret;
2852 } 2901 back = q.cdsc_format; /* local copy */
2853 case CDROMREADAUDIO: { 2902 sanitize_format(&q.cdsc_absaddr, &back, requested);
2854 struct cdrom_read_audio ra; 2903 sanitize_format(&q.cdsc_reladdr, &q.cdsc_format, requested);
2855 int lba; 2904 IOCTL_OUT(arg, struct cdrom_subchnl, q);
2856 2905 /* cdinfo(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */
2857 IOCTL_IN(arg, struct cdrom_read_audio, ra); 2906 return 0;
2858 2907}
2859 if (ra.addr_format == CDROM_MSF)
2860 lba = msf_to_lba(ra.addr.msf.minute,
2861 ra.addr.msf.second,
2862 ra.addr.msf.frame);
2863 else if (ra.addr_format == CDROM_LBA)
2864 lba = ra.addr.lba;
2865 else
2866 return -EINVAL;
2867 2908
2868 /* FIXME: we need upper bound checking, too!! */ 2909static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi,
2869 if (lba < 0 || ra.nframes <= 0 || ra.nframes > CD_FRAMES) 2910 void __user *arg,
2870 return -EINVAL; 2911 struct packet_command *cgc)
2912{
2913 struct cdrom_device_ops *cdo = cdi->ops;
2914 struct cdrom_msf msf;
2915 cdinfo(CD_DO_IOCTL, "entering CDROMPLAYMSF\n");
2916 IOCTL_IN(arg, struct cdrom_msf, msf);
2917 cgc->cmd[0] = GPCMD_PLAY_AUDIO_MSF;
2918 cgc->cmd[3] = msf.cdmsf_min0;
2919 cgc->cmd[4] = msf.cdmsf_sec0;
2920 cgc->cmd[5] = msf.cdmsf_frame0;
2921 cgc->cmd[6] = msf.cdmsf_min1;
2922 cgc->cmd[7] = msf.cdmsf_sec1;
2923 cgc->cmd[8] = msf.cdmsf_frame1;
2924 cgc->data_direction = CGC_DATA_NONE;
2925 return cdo->generic_packet(cdi, cgc);
2926}
2871 2927
2872 return cdrom_read_cdda(cdi, ra.buf, lba, ra.nframes); 2928static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi,
2873 } 2929 void __user *arg,
2874 case CDROMSUBCHNL: { 2930 struct packet_command *cgc)
2875 struct cdrom_subchnl q; 2931{
2876 u_char requested, back; 2932 struct cdrom_device_ops *cdo = cdi->ops;
2877 IOCTL_IN(arg, struct cdrom_subchnl, q); 2933 struct cdrom_blk blk;
2878 requested = q.cdsc_format; 2934 cdinfo(CD_DO_IOCTL, "entering CDROMPLAYBLK\n");
2879 if (!((requested == CDROM_MSF) || 2935 IOCTL_IN(arg, struct cdrom_blk, blk);
2880 (requested == CDROM_LBA))) 2936 cgc->cmd[0] = GPCMD_PLAY_AUDIO_10;
2881 return -EINVAL; 2937 cgc->cmd[2] = (blk.from >> 24) & 0xff;
2882 q.cdsc_format = CDROM_MSF; 2938 cgc->cmd[3] = (blk.from >> 16) & 0xff;
2883 if ((ret = cdrom_read_subchannel(cdi, &q, 0))) 2939 cgc->cmd[4] = (blk.from >> 8) & 0xff;
2884 return ret; 2940 cgc->cmd[5] = blk.from & 0xff;
2885 back = q.cdsc_format; /* local copy */ 2941 cgc->cmd[7] = (blk.len >> 8) & 0xff;
2886 sanitize_format(&q.cdsc_absaddr, &back, requested); 2942 cgc->cmd[8] = blk.len & 0xff;
2887 sanitize_format(&q.cdsc_reladdr, &q.cdsc_format, requested); 2943 cgc->data_direction = CGC_DATA_NONE;
2888 IOCTL_OUT(arg, struct cdrom_subchnl, q); 2944 return cdo->generic_packet(cdi, cgc);
2889 /* cdinfo(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */ 2945}
2890 return 0; 2946
2891 } 2947static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi,
2892 case CDROMPLAYMSF: { 2948 void __user *arg,
2893 struct cdrom_msf msf; 2949 struct packet_command *cgc,
2894 cdinfo(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); 2950 unsigned int cmd)
2895 IOCTL_IN(arg, struct cdrom_msf, msf); 2951{
2896 cgc.cmd[0] = GPCMD_PLAY_AUDIO_MSF; 2952 struct cdrom_volctrl volctrl;
2897 cgc.cmd[3] = msf.cdmsf_min0; 2953 unsigned char buffer[32];
2898 cgc.cmd[4] = msf.cdmsf_sec0; 2954 char mask[sizeof(buffer)];
2899 cgc.cmd[5] = msf.cdmsf_frame0; 2955 unsigned short offset;
2900 cgc.cmd[6] = msf.cdmsf_min1; 2956 int ret;
2901 cgc.cmd[7] = msf.cdmsf_sec1;
2902 cgc.cmd[8] = msf.cdmsf_frame1;
2903 cgc.data_direction = CGC_DATA_NONE;
2904 return cdo->generic_packet(cdi, &cgc);
2905 }
2906 case CDROMPLAYBLK: {
2907 struct cdrom_blk blk;
2908 cdinfo(CD_DO_IOCTL, "entering CDROMPLAYBLK\n");
2909 IOCTL_IN(arg, struct cdrom_blk, blk);
2910 cgc.cmd[0] = GPCMD_PLAY_AUDIO_10;
2911 cgc.cmd[2] = (blk.from >> 24) & 0xff;
2912 cgc.cmd[3] = (blk.from >> 16) & 0xff;
2913 cgc.cmd[4] = (blk.from >> 8) & 0xff;
2914 cgc.cmd[5] = blk.from & 0xff;
2915 cgc.cmd[7] = (blk.len >> 8) & 0xff;
2916 cgc.cmd[8] = blk.len & 0xff;
2917 cgc.data_direction = CGC_DATA_NONE;
2918 return cdo->generic_packet(cdi, &cgc);
2919 }
2920 case CDROMVOLCTRL:
2921 case CDROMVOLREAD: {
2922 struct cdrom_volctrl volctrl;
2923 char mask[sizeof(buffer)];
2924 unsigned short offset;
2925 2957
2926 cdinfo(CD_DO_IOCTL, "entering CDROMVOLUME\n"); 2958 cdinfo(CD_DO_IOCTL, "entering CDROMVOLUME\n");
2927 2959
2928 IOCTL_IN(arg, struct cdrom_volctrl, volctrl); 2960 IOCTL_IN(arg, struct cdrom_volctrl, volctrl);
2929 2961
2930 cgc.buffer = buffer; 2962 cgc->buffer = buffer;
2931 cgc.buflen = 24; 2963 cgc->buflen = 24;
2932 if ((ret = cdrom_mode_sense(cdi, &cgc, GPMODE_AUDIO_CTL_PAGE, 0))) 2964 ret = cdrom_mode_sense(cdi, cgc, GPMODE_AUDIO_CTL_PAGE, 0);
2933 return ret; 2965 if (ret)
2966 return ret;
2934 2967
2935 /* originally the code depended on buffer[1] to determine 2968 /* originally the code depended on buffer[1] to determine
2936 how much data is available for transfer. buffer[1] is 2969 how much data is available for transfer. buffer[1] is
2937 unfortunately ambigious and the only reliable way seem 2970 unfortunately ambigious and the only reliable way seem
2938 to be to simply skip over the block descriptor... */ 2971 to be to simply skip over the block descriptor... */
2939 offset = 8 + be16_to_cpu(*(__be16 *)(buffer+6)); 2972 offset = 8 + be16_to_cpu(*(__be16 *)(buffer + 6));
2940 2973
2941 if (offset + 16 > sizeof(buffer)) 2974 if (offset + 16 > sizeof(buffer))
2942 return -E2BIG; 2975 return -E2BIG;
2943 2976
2944 if (offset + 16 > cgc.buflen) { 2977 if (offset + 16 > cgc->buflen) {
2945 cgc.buflen = offset+16; 2978 cgc->buflen = offset + 16;
2946 ret = cdrom_mode_sense(cdi, &cgc, 2979 ret = cdrom_mode_sense(cdi, cgc,
2947 GPMODE_AUDIO_CTL_PAGE, 0); 2980 GPMODE_AUDIO_CTL_PAGE, 0);
2948 if (ret) 2981 if (ret)
2949 return ret; 2982 return ret;
2950 } 2983 }
2951 2984
2952 /* sanity check */ 2985 /* sanity check */
2953 if ((buffer[offset] & 0x3f) != GPMODE_AUDIO_CTL_PAGE || 2986 if ((buffer[offset] & 0x3f) != GPMODE_AUDIO_CTL_PAGE ||
2954 buffer[offset+1] < 14) 2987 buffer[offset + 1] < 14)
2955 return -EINVAL; 2988 return -EINVAL;
2956 2989
2957 /* now we have the current volume settings. if it was only 2990 /* now we have the current volume settings. if it was only
2958 a CDROMVOLREAD, return these values */ 2991 a CDROMVOLREAD, return these values */
2959 if (cmd == CDROMVOLREAD) { 2992 if (cmd == CDROMVOLREAD) {
2960 volctrl.channel0 = buffer[offset+9]; 2993 volctrl.channel0 = buffer[offset+9];
2961 volctrl.channel1 = buffer[offset+11]; 2994 volctrl.channel1 = buffer[offset+11];
2962 volctrl.channel2 = buffer[offset+13]; 2995 volctrl.channel2 = buffer[offset+13];
2963 volctrl.channel3 = buffer[offset+15]; 2996 volctrl.channel3 = buffer[offset+15];
2964 IOCTL_OUT(arg, struct cdrom_volctrl, volctrl); 2997 IOCTL_OUT(arg, struct cdrom_volctrl, volctrl);
2965 return 0; 2998 return 0;
2966 } 2999 }
2967 3000
2968 /* get the volume mask */ 3001 /* get the volume mask */
2969 cgc.buffer = mask; 3002 cgc->buffer = mask;
2970 if ((ret = cdrom_mode_sense(cdi, &cgc, 3003 ret = cdrom_mode_sense(cdi, cgc, GPMODE_AUDIO_CTL_PAGE, 1);
2971 GPMODE_AUDIO_CTL_PAGE, 1))) 3004 if (ret)
2972 return ret; 3005 return ret;
2973 3006
2974 buffer[offset+9] = volctrl.channel0 & mask[offset+9]; 3007 buffer[offset + 9] = volctrl.channel0 & mask[offset + 9];
2975 buffer[offset+11] = volctrl.channel1 & mask[offset+11]; 3008 buffer[offset + 11] = volctrl.channel1 & mask[offset + 11];
2976 buffer[offset+13] = volctrl.channel2 & mask[offset+13]; 3009 buffer[offset + 13] = volctrl.channel2 & mask[offset + 13];
2977 buffer[offset+15] = volctrl.channel3 & mask[offset+15]; 3010 buffer[offset + 15] = volctrl.channel3 & mask[offset + 15];
2978 3011
2979 /* set volume */ 3012 /* set volume */
2980 cgc.buffer = buffer + offset - 8; 3013 cgc->buffer = buffer + offset - 8;
2981 memset(cgc.buffer, 0, 8); 3014 memset(cgc->buffer, 0, 8);
2982 return cdrom_mode_select(cdi, &cgc); 3015 return cdrom_mode_select(cdi, cgc);
2983 } 3016}
2984 3017
2985 case CDROMSTART: 3018static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi,
2986 case CDROMSTOP: { 3019 struct packet_command *cgc,
2987 cdinfo(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n"); 3020 int cmd)
2988 cgc.cmd[0] = GPCMD_START_STOP_UNIT; 3021{
2989 cgc.cmd[1] = 1; 3022 struct cdrom_device_ops *cdo = cdi->ops;
2990 cgc.cmd[4] = (cmd == CDROMSTART) ? 1 : 0; 3023 cdinfo(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n");
2991 cgc.data_direction = CGC_DATA_NONE; 3024 cgc->cmd[0] = GPCMD_START_STOP_UNIT;
2992 return cdo->generic_packet(cdi, &cgc); 3025 cgc->cmd[1] = 1;
2993 } 3026 cgc->cmd[4] = (cmd == CDROMSTART) ? 1 : 0;
3027 cgc->data_direction = CGC_DATA_NONE;
3028 return cdo->generic_packet(cdi, cgc);
3029}
2994 3030
2995 case CDROMPAUSE: 3031static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi,
2996 case CDROMRESUME: { 3032 struct packet_command *cgc,
2997 cdinfo(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n"); 3033 int cmd)
2998 cgc.cmd[0] = GPCMD_PAUSE_RESUME; 3034{
2999 cgc.cmd[8] = (cmd == CDROMRESUME) ? 1 : 0; 3035 struct cdrom_device_ops *cdo = cdi->ops;
3000 cgc.data_direction = CGC_DATA_NONE; 3036 cdinfo(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n");
3001 return cdo->generic_packet(cdi, &cgc); 3037 cgc->cmd[0] = GPCMD_PAUSE_RESUME;
3002 } 3038 cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0;
3039 cgc->data_direction = CGC_DATA_NONE;
3040 return cdo->generic_packet(cdi, cgc);
3041}
3003 3042
3004 case DVD_READ_STRUCT: { 3043static noinline int mmc_ioctl_dvd_read_struct(struct cdrom_device_info *cdi,
3005 dvd_struct *s; 3044 void __user *arg,
3006 int size = sizeof(dvd_struct); 3045 struct packet_command *cgc)
3007 if (!CDROM_CAN(CDC_DVD)) 3046{
3008 return -ENOSYS; 3047 int ret;
3009 if ((s = kmalloc(size, GFP_KERNEL)) == NULL) 3048 dvd_struct *s;
3010 return -ENOMEM; 3049 int size = sizeof(dvd_struct);
3011 cdinfo(CD_DO_IOCTL, "entering DVD_READ_STRUCT\n"); 3050
3012 if (copy_from_user(s, (dvd_struct __user *)arg, size)) { 3051 if (!CDROM_CAN(CDC_DVD))
3013 kfree(s); 3052 return -ENOSYS;
3014 return -EFAULT; 3053
3015 } 3054 s = kmalloc(size, GFP_KERNEL);
3016 if ((ret = dvd_read_struct(cdi, s))) { 3055 if (!s)
3017 kfree(s); 3056 return -ENOMEM;
3018 return ret; 3057
3019 } 3058 cdinfo(CD_DO_IOCTL, "entering DVD_READ_STRUCT\n");
3020 if (copy_to_user((dvd_struct __user *)arg, s, size)) 3059 if (copy_from_user(s, arg, size)) {
3021 ret = -EFAULT;
3022 kfree(s); 3060 kfree(s);
3061 return -EFAULT;
3062 }
3063
3064 ret = dvd_read_struct(cdi, s, cgc);
3065 if (ret)
3066 goto out;
3067
3068 if (copy_to_user(arg, s, size))
3069 ret = -EFAULT;
3070out:
3071 kfree(s);
3072 return ret;
3073}
3074
3075static noinline int mmc_ioctl_dvd_auth(struct cdrom_device_info *cdi,
3076 void __user *arg)
3077{
3078 int ret;
3079 dvd_authinfo ai;
3080 if (!CDROM_CAN(CDC_DVD))
3081 return -ENOSYS;
3082 cdinfo(CD_DO_IOCTL, "entering DVD_AUTH\n");
3083 IOCTL_IN(arg, dvd_authinfo, ai);
3084 ret = dvd_do_auth(cdi, &ai);
3085 if (ret)
3023 return ret; 3086 return ret;
3024 } 3087 IOCTL_OUT(arg, dvd_authinfo, ai);
3088 return 0;
3089}
3025 3090
3026 case DVD_AUTH: { 3091static noinline int mmc_ioctl_cdrom_next_writable(struct cdrom_device_info *cdi,
3027 dvd_authinfo ai; 3092 void __user *arg)
3028 if (!CDROM_CAN(CDC_DVD)) 3093{
3029 return -ENOSYS; 3094 int ret;
3030 cdinfo(CD_DO_IOCTL, "entering DVD_AUTH\n"); 3095 long next = 0;
3031 IOCTL_IN(arg, dvd_authinfo, ai); 3096 cdinfo(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n");
3032 if ((ret = dvd_do_auth (cdi, &ai))) 3097 ret = cdrom_get_next_writable(cdi, &next);
3033 return ret; 3098 if (ret)
3034 IOCTL_OUT(arg, dvd_authinfo, ai); 3099 return ret;
3035 return 0; 3100 IOCTL_OUT(arg, long, next);
3036 } 3101 return 0;
3102}
3037 3103
3038 case CDROM_NEXT_WRITABLE: { 3104static noinline int mmc_ioctl_cdrom_last_written(struct cdrom_device_info *cdi,
3039 long next = 0; 3105 void __user *arg)
3040 cdinfo(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n"); 3106{
3041 if ((ret = cdrom_get_next_writable(cdi, &next))) 3107 int ret;
3042 return ret; 3108 long last = 0;
3043 IOCTL_OUT(arg, long, next); 3109 cdinfo(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n");
3044 return 0; 3110 ret = cdrom_get_last_written(cdi, &last);
3045 } 3111 if (ret)
3046 case CDROM_LAST_WRITTEN: { 3112 return ret;
3047 long last = 0; 3113 IOCTL_OUT(arg, long, last);
3048 cdinfo(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n"); 3114 return 0;
3049 if ((ret = cdrom_get_last_written(cdi, &last))) 3115}
3050 return ret; 3116
3051 IOCTL_OUT(arg, long, last); 3117static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd,
3052 return 0; 3118 unsigned long arg)
3053 } 3119{
3054 } /* switch */ 3120 struct packet_command cgc;
3121 void __user *userptr = (void __user *)arg;
3122
3123 memset(&cgc, 0, sizeof(cgc));
3124
3125 /* build a unified command and queue it through
3126 cdo->generic_packet() */
3127 switch (cmd) {
3128 case CDROMREADRAW:
3129 case CDROMREADMODE1:
3130 case CDROMREADMODE2:
3131 return mmc_ioctl_cdrom_read_data(cdi, userptr, &cgc, cmd);
3132 case CDROMREADAUDIO:
3133 return mmc_ioctl_cdrom_read_audio(cdi, userptr);
3134 case CDROMSUBCHNL:
3135 return mmc_ioctl_cdrom_subchannel(cdi, userptr);
3136 case CDROMPLAYMSF:
3137 return mmc_ioctl_cdrom_play_msf(cdi, userptr, &cgc);
3138 case CDROMPLAYBLK:
3139 return mmc_ioctl_cdrom_play_blk(cdi, userptr, &cgc);
3140 case CDROMVOLCTRL:
3141 case CDROMVOLREAD:
3142 return mmc_ioctl_cdrom_volume(cdi, userptr, &cgc, cmd);
3143 case CDROMSTART:
3144 case CDROMSTOP:
3145 return mmc_ioctl_cdrom_start_stop(cdi, &cgc, cmd);
3146 case CDROMPAUSE:
3147 case CDROMRESUME:
3148 return mmc_ioctl_cdrom_pause_resume(cdi, &cgc, cmd);
3149 case DVD_READ_STRUCT:
3150 return mmc_ioctl_dvd_read_struct(cdi, userptr, &cgc);
3151 case DVD_AUTH:
3152 return mmc_ioctl_dvd_auth(cdi, userptr);
3153 case CDROM_NEXT_WRITABLE:
3154 return mmc_ioctl_cdrom_next_writable(cdi, userptr);
3155 case CDROM_LAST_WRITTEN:
3156 return mmc_ioctl_cdrom_last_written(cdi, userptr);
3157 }
3055 3158
3056 return -ENOTTY; 3159 return -ENOTTY;
3057} 3160}
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index ce26c84af064..3326750ec02c 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1060,7 +1060,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1060 goto bad_page_pool; 1060 goto bad_page_pool;
1061 } 1061 }
1062 1062
1063 cc->bs = bioset_create(MIN_IOS, MIN_IOS); 1063 cc->bs = bioset_create(MIN_IOS, 0);
1064 if (!cc->bs) { 1064 if (!cc->bs) {
1065 ti->error = "Cannot allocate crypt bioset"; 1065 ti->error = "Cannot allocate crypt bioset";
1066 goto bad_bs; 1066 goto bad_bs;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2fd6d4450637..a34338567a2a 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -56,7 +56,7 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages)
56 if (!client->pool) 56 if (!client->pool)
57 goto bad; 57 goto bad;
58 58
59 client->bios = bioset_create(16, 16); 59 client->bios = bioset_create(16, 0);
60 if (!client->bios) 60 if (!client->bios)
61 goto bad; 61 goto bad;
62 62
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 343094c3feeb..421c9f02d8ca 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1093,7 +1093,7 @@ static struct mapped_device *alloc_dev(int minor)
1093 if (!md->tio_pool) 1093 if (!md->tio_pool)
1094 goto bad_tio_pool; 1094 goto bad_tio_pool;
1095 1095
1096 md->bs = bioset_create(16, 16); 1096 md->bs = bioset_create(16, 0);
1097 if (!md->bs) 1097 if (!md->bs)
1098 goto bad_no_bioset; 1098 goto bad_no_bioset;
1099 1099
diff --git a/fs/aio.c b/fs/aio.c
index f658441d5666..d6f89d3c15e8 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -191,6 +191,20 @@ static int aio_setup_ring(struct kioctx *ctx)
191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \ 191 kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
192} while(0) 192} while(0)
193 193
194static void ctx_rcu_free(struct rcu_head *head)
195{
196 struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
197 unsigned nr_events = ctx->max_reqs;
198
199 kmem_cache_free(kioctx_cachep, ctx);
200
201 if (nr_events) {
202 spin_lock(&aio_nr_lock);
203 BUG_ON(aio_nr - nr_events > aio_nr);
204 aio_nr -= nr_events;
205 spin_unlock(&aio_nr_lock);
206 }
207}
194 208
195/* __put_ioctx 209/* __put_ioctx
196 * Called when the last user of an aio context has gone away, 210 * Called when the last user of an aio context has gone away,
@@ -198,8 +212,6 @@ static int aio_setup_ring(struct kioctx *ctx)
198 */ 212 */
199static void __put_ioctx(struct kioctx *ctx) 213static void __put_ioctx(struct kioctx *ctx)
200{ 214{
201 unsigned nr_events = ctx->max_reqs;
202
203 BUG_ON(ctx->reqs_active); 215 BUG_ON(ctx->reqs_active);
204 216
205 cancel_delayed_work(&ctx->wq); 217 cancel_delayed_work(&ctx->wq);
@@ -208,14 +220,7 @@ static void __put_ioctx(struct kioctx *ctx)
208 mmdrop(ctx->mm); 220 mmdrop(ctx->mm);
209 ctx->mm = NULL; 221 ctx->mm = NULL;
210 pr_debug("__put_ioctx: freeing %p\n", ctx); 222 pr_debug("__put_ioctx: freeing %p\n", ctx);
211 kmem_cache_free(kioctx_cachep, ctx); 223 call_rcu(&ctx->rcu_head, ctx_rcu_free);
212
213 if (nr_events) {
214 spin_lock(&aio_nr_lock);
215 BUG_ON(aio_nr - nr_events > aio_nr);
216 aio_nr -= nr_events;
217 spin_unlock(&aio_nr_lock);
218 }
219} 224}
220 225
221#define get_ioctx(kioctx) do { \ 226#define get_ioctx(kioctx) do { \
@@ -235,6 +240,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
235{ 240{
236 struct mm_struct *mm; 241 struct mm_struct *mm;
237 struct kioctx *ctx; 242 struct kioctx *ctx;
243 int did_sync = 0;
238 244
239 /* Prevent overflows */ 245 /* Prevent overflows */
240 if ((nr_events > (0x10000000U / sizeof(struct io_event))) || 246 if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
@@ -267,21 +273,30 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
267 goto out_freectx; 273 goto out_freectx;
268 274
269 /* limit the number of system wide aios */ 275 /* limit the number of system wide aios */
270 spin_lock(&aio_nr_lock); 276 do {
271 if (aio_nr + ctx->max_reqs > aio_max_nr || 277 spin_lock_bh(&aio_nr_lock);
272 aio_nr + ctx->max_reqs < aio_nr) 278 if (aio_nr + nr_events > aio_max_nr ||
273 ctx->max_reqs = 0; 279 aio_nr + nr_events < aio_nr)
274 else 280 ctx->max_reqs = 0;
275 aio_nr += ctx->max_reqs; 281 else
276 spin_unlock(&aio_nr_lock); 282 aio_nr += ctx->max_reqs;
283 spin_unlock_bh(&aio_nr_lock);
284 if (ctx->max_reqs || did_sync)
285 break;
286
287 /* wait for rcu callbacks to have completed before giving up */
288 synchronize_rcu();
289 did_sync = 1;
290 ctx->max_reqs = nr_events;
291 } while (1);
292
277 if (ctx->max_reqs == 0) 293 if (ctx->max_reqs == 0)
278 goto out_cleanup; 294 goto out_cleanup;
279 295
280 /* now link into global list. */ 296 /* now link into global list. */
281 write_lock(&mm->ioctx_list_lock); 297 spin_lock(&mm->ioctx_lock);
282 ctx->next = mm->ioctx_list; 298 hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
283 mm->ioctx_list = ctx; 299 spin_unlock(&mm->ioctx_lock);
284 write_unlock(&mm->ioctx_list_lock);
285 300
286 dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", 301 dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
287 ctx, ctx->user_id, current->mm, ctx->ring_info.nr); 302 ctx, ctx->user_id, current->mm, ctx->ring_info.nr);
@@ -375,11 +390,12 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
375 */ 390 */
376void exit_aio(struct mm_struct *mm) 391void exit_aio(struct mm_struct *mm)
377{ 392{
378 struct kioctx *ctx = mm->ioctx_list; 393 struct kioctx *ctx;
379 mm->ioctx_list = NULL; 394
380 while (ctx) { 395 while (!hlist_empty(&mm->ioctx_list)) {
381 struct kioctx *next = ctx->next; 396 ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
382 ctx->next = NULL; 397 hlist_del_rcu(&ctx->list);
398
383 aio_cancel_all(ctx); 399 aio_cancel_all(ctx);
384 400
385 wait_for_all_aios(ctx); 401 wait_for_all_aios(ctx);
@@ -394,7 +410,6 @@ void exit_aio(struct mm_struct *mm)
394 atomic_read(&ctx->users), ctx->dead, 410 atomic_read(&ctx->users), ctx->dead,
395 ctx->reqs_active); 411 ctx->reqs_active);
396 put_ioctx(ctx); 412 put_ioctx(ctx);
397 ctx = next;
398 } 413 }
399} 414}
400 415
@@ -555,19 +570,21 @@ int aio_put_req(struct kiocb *req)
555 570
556static struct kioctx *lookup_ioctx(unsigned long ctx_id) 571static struct kioctx *lookup_ioctx(unsigned long ctx_id)
557{ 572{
558 struct kioctx *ioctx; 573 struct mm_struct *mm = current->mm;
559 struct mm_struct *mm; 574 struct kioctx *ctx = NULL;
575 struct hlist_node *n;
560 576
561 mm = current->mm; 577 rcu_read_lock();
562 read_lock(&mm->ioctx_list_lock); 578
563 for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next) 579 hlist_for_each_entry_rcu(ctx, n, &mm->ioctx_list, list) {
564 if (likely(ioctx->user_id == ctx_id && !ioctx->dead)) { 580 if (ctx->user_id == ctx_id && !ctx->dead) {
565 get_ioctx(ioctx); 581 get_ioctx(ctx);
566 break; 582 break;
567 } 583 }
568 read_unlock(&mm->ioctx_list_lock); 584 }
569 585
570 return ioctx; 586 rcu_read_unlock();
587 return ctx;
571} 588}
572 589
573/* 590/*
@@ -1215,19 +1232,14 @@ out:
1215static void io_destroy(struct kioctx *ioctx) 1232static void io_destroy(struct kioctx *ioctx)
1216{ 1233{
1217 struct mm_struct *mm = current->mm; 1234 struct mm_struct *mm = current->mm;
1218 struct kioctx **tmp;
1219 int was_dead; 1235 int was_dead;
1220 1236
1221 /* delete the entry from the list is someone else hasn't already */ 1237 /* delete the entry from the list is someone else hasn't already */
1222 write_lock(&mm->ioctx_list_lock); 1238 spin_lock(&mm->ioctx_lock);
1223 was_dead = ioctx->dead; 1239 was_dead = ioctx->dead;
1224 ioctx->dead = 1; 1240 ioctx->dead = 1;
1225 for (tmp = &mm->ioctx_list; *tmp && *tmp != ioctx; 1241 hlist_del_rcu(&ioctx->list);
1226 tmp = &(*tmp)->next) 1242 spin_unlock(&mm->ioctx_lock);
1227 ;
1228 if (*tmp)
1229 *tmp = ioctx->next;
1230 write_unlock(&mm->ioctx_list_lock);
1231 1243
1232 dprintk("aio_release(%p)\n", ioctx); 1244 dprintk("aio_release(%p)\n", ioctx);
1233 if (likely(!was_dead)) 1245 if (likely(!was_dead))
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 19caf7c962ac..77ebc3c263d6 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -111,7 +111,7 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs)
111 && bip->bip_buf != NULL) 111 && bip->bip_buf != NULL)
112 kfree(bip->bip_buf); 112 kfree(bip->bip_buf);
113 113
114 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); 114 bvec_free_bs(bs, bip->bip_vec, bip->bip_pool);
115 mempool_free(bip, bs->bio_integrity_pool); 115 mempool_free(bip, bs->bio_integrity_pool);
116 116
117 bio->bi_integrity = NULL; 117 bio->bi_integrity = NULL;
diff --git a/fs/bio.c b/fs/bio.c
index df99c882b807..711cee103602 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -31,7 +31,11 @@
31 31
32DEFINE_TRACE(block_split); 32DEFINE_TRACE(block_split);
33 33
34static struct kmem_cache *bio_slab __read_mostly; 34/*
35 * Test patch to inline a certain number of bi_io_vec's inside the bio
36 * itself, to shrink a bio data allocation from two mempool calls to one
37 */
38#define BIO_INLINE_VECS 4
35 39
36static mempool_t *bio_split_pool __read_mostly; 40static mempool_t *bio_split_pool __read_mostly;
37 41
@@ -40,9 +44,8 @@ static mempool_t *bio_split_pool __read_mostly;
40 * break badly! cannot be bigger than what you can fit into an 44 * break badly! cannot be bigger than what you can fit into an
41 * unsigned short 45 * unsigned short
42 */ 46 */
43
44#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) } 47#define BV(x) { .nr_vecs = x, .name = "biovec-"__stringify(x) }
45static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { 48struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
46 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES), 49 BV(1), BV(4), BV(16), BV(64), BV(128), BV(BIO_MAX_PAGES),
47}; 50};
48#undef BV 51#undef BV
@@ -53,12 +56,121 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
53 */ 56 */
54struct bio_set *fs_bio_set; 57struct bio_set *fs_bio_set;
55 58
59/*
60 * Our slab pool management
61 */
62struct bio_slab {
63 struct kmem_cache *slab;
64 unsigned int slab_ref;
65 unsigned int slab_size;
66 char name[8];
67};
68static DEFINE_MUTEX(bio_slab_lock);
69static struct bio_slab *bio_slabs;
70static unsigned int bio_slab_nr, bio_slab_max;
71
72static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
73{
74 unsigned int sz = sizeof(struct bio) + extra_size;
75 struct kmem_cache *slab = NULL;
76 struct bio_slab *bslab;
77 unsigned int i, entry = -1;
78
79 mutex_lock(&bio_slab_lock);
80
81 i = 0;
82 while (i < bio_slab_nr) {
83 struct bio_slab *bslab = &bio_slabs[i];
84
85 if (!bslab->slab && entry == -1)
86 entry = i;
87 else if (bslab->slab_size == sz) {
88 slab = bslab->slab;
89 bslab->slab_ref++;
90 break;
91 }
92 i++;
93 }
94
95 if (slab)
96 goto out_unlock;
97
98 if (bio_slab_nr == bio_slab_max && entry == -1) {
99 bio_slab_max <<= 1;
100 bio_slabs = krealloc(bio_slabs,
101 bio_slab_max * sizeof(struct bio_slab),
102 GFP_KERNEL);
103 if (!bio_slabs)
104 goto out_unlock;
105 }
106 if (entry == -1)
107 entry = bio_slab_nr++;
108
109 bslab = &bio_slabs[entry];
110
111 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
112 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL);
113 if (!slab)
114 goto out_unlock;
115
116 printk("bio: create slab <%s> at %d\n", bslab->name, entry);
117 bslab->slab = slab;
118 bslab->slab_ref = 1;
119 bslab->slab_size = sz;
120out_unlock:
121 mutex_unlock(&bio_slab_lock);
122 return slab;
123}
124
125static void bio_put_slab(struct bio_set *bs)
126{
127 struct bio_slab *bslab = NULL;
128 unsigned int i;
129
130 mutex_lock(&bio_slab_lock);
131
132 for (i = 0; i < bio_slab_nr; i++) {
133 if (bs->bio_slab == bio_slabs[i].slab) {
134 bslab = &bio_slabs[i];
135 break;
136 }
137 }
138
139 if (WARN(!bslab, KERN_ERR "bio: unable to find slab!\n"))
140 goto out;
141
142 WARN_ON(!bslab->slab_ref);
143
144 if (--bslab->slab_ref)
145 goto out;
146
147 kmem_cache_destroy(bslab->slab);
148 bslab->slab = NULL;
149
150out:
151 mutex_unlock(&bio_slab_lock);
152}
153
56unsigned int bvec_nr_vecs(unsigned short idx) 154unsigned int bvec_nr_vecs(unsigned short idx)
57{ 155{
58 return bvec_slabs[idx].nr_vecs; 156 return bvec_slabs[idx].nr_vecs;
59} 157}
60 158
61struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) 159void bvec_free_bs(struct bio_set *bs, struct bio_vec *bv, unsigned int idx)
160{
161 BIO_BUG_ON(idx >= BIOVEC_NR_POOLS);
162
163 if (idx == BIOVEC_MAX_IDX)
164 mempool_free(bv, bs->bvec_pool);
165 else {
166 struct biovec_slab *bvs = bvec_slabs + idx;
167
168 kmem_cache_free(bvs->slab, bv);
169 }
170}
171
172struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx,
173 struct bio_set *bs)
62{ 174{
63 struct bio_vec *bvl; 175 struct bio_vec *bvl;
64 176
@@ -67,60 +179,85 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
67 * If not, this is a bio_kmalloc() allocation and just do a 179 * If not, this is a bio_kmalloc() allocation and just do a
68 * kzalloc() for the exact number of vecs right away. 180 * kzalloc() for the exact number of vecs right away.
69 */ 181 */
70 if (bs) { 182 if (!bs)
183 bvl = kmalloc(nr * sizeof(struct bio_vec), gfp_mask);
184
185 /*
186 * see comment near bvec_array define!
187 */
188 switch (nr) {
189 case 1:
190 *idx = 0;
191 break;
192 case 2 ... 4:
193 *idx = 1;
194 break;
195 case 5 ... 16:
196 *idx = 2;
197 break;
198 case 17 ... 64:
199 *idx = 3;
200 break;
201 case 65 ... 128:
202 *idx = 4;
203 break;
204 case 129 ... BIO_MAX_PAGES:
205 *idx = 5;
206 break;
207 default:
208 return NULL;
209 }
210
211 /*
212 * idx now points to the pool we want to allocate from. only the
213 * 1-vec entry pool is mempool backed.
214 */
215 if (*idx == BIOVEC_MAX_IDX) {
216fallback:
217 bvl = mempool_alloc(bs->bvec_pool, gfp_mask);
218 } else {
219 struct biovec_slab *bvs = bvec_slabs + *idx;
220 gfp_t __gfp_mask = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
221
71 /* 222 /*
72 * see comment near bvec_array define! 223 * Make this allocation restricted and don't dump info on
224 * allocation failures, since we'll fallback to the mempool
225 * in case of failure.
73 */ 226 */
74 switch (nr) { 227 __gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
75 case 1:
76 *idx = 0;
77 break;
78 case 2 ... 4:
79 *idx = 1;
80 break;
81 case 5 ... 16:
82 *idx = 2;
83 break;
84 case 17 ... 64:
85 *idx = 3;
86 break;
87 case 65 ... 128:
88 *idx = 4;
89 break;
90 case 129 ... BIO_MAX_PAGES:
91 *idx = 5;
92 break;
93 default:
94 return NULL;
95 }
96 228
97 /* 229 /*
98 * idx now points to the pool we want to allocate from 230 * Try a slab allocation. If this fails and __GFP_WAIT
231 * is set, retry with the 1-entry mempool
99 */ 232 */
100 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 233 bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
101 if (bvl) 234 if (unlikely(!bvl && (gfp_mask & __GFP_WAIT))) {
102 memset(bvl, 0, 235 *idx = BIOVEC_MAX_IDX;
103 bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); 236 goto fallback;
104 } else 237 }
105 bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); 238 }
106 239
107 return bvl; 240 return bvl;
108} 241}
109 242
110void bio_free(struct bio *bio, struct bio_set *bio_set) 243void bio_free(struct bio *bio, struct bio_set *bs)
111{ 244{
112 if (bio->bi_io_vec) { 245 void *p;
113 const int pool_idx = BIO_POOL_IDX(bio);
114 246
115 BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); 247 if (bio_has_allocated_vec(bio))
116 248 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
117 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
118 }
119 249
120 if (bio_integrity(bio)) 250 if (bio_integrity(bio))
121 bio_integrity_free(bio, bio_set); 251 bio_integrity_free(bio, bs);
252
253 /*
254 * If we have front padding, adjust the bio pointer before freeing
255 */
256 p = bio;
257 if (bs->front_pad)
258 p -= bs->front_pad;
122 259
123 mempool_free(bio, bio_set->bio_pool); 260 mempool_free(p, bs->bio_pool);
124} 261}
125 262
126/* 263/*
@@ -133,7 +270,8 @@ static void bio_fs_destructor(struct bio *bio)
133 270
134static void bio_kmalloc_destructor(struct bio *bio) 271static void bio_kmalloc_destructor(struct bio *bio)
135{ 272{
136 kfree(bio->bi_io_vec); 273 if (bio_has_allocated_vec(bio))
274 kfree(bio->bi_io_vec);
137 kfree(bio); 275 kfree(bio);
138} 276}
139 277
@@ -157,16 +295,20 @@ void bio_init(struct bio *bio)
157 * for a &struct bio to become free. If a %NULL @bs is passed in, we will 295 * for a &struct bio to become free. If a %NULL @bs is passed in, we will
158 * fall back to just using @kmalloc to allocate the required memory. 296 * fall back to just using @kmalloc to allocate the required memory.
159 * 297 *
160 * allocate bio and iovecs from the memory pools specified by the 298 * Note that the caller must set ->bi_destructor on succesful return
161 * bio_set structure, or @kmalloc if none given. 299 * of a bio, to do the appropriate freeing of the bio once the reference
300 * count drops to zero.
162 **/ 301 **/
163struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 302struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
164{ 303{
165 struct bio *bio; 304 struct bio *bio = NULL;
305
306 if (bs) {
307 void *p = mempool_alloc(bs->bio_pool, gfp_mask);
166 308
167 if (bs) 309 if (p)
168 bio = mempool_alloc(bs->bio_pool, gfp_mask); 310 bio = p + bs->front_pad;
169 else 311 } else
170 bio = kmalloc(sizeof(*bio), gfp_mask); 312 bio = kmalloc(sizeof(*bio), gfp_mask);
171 313
172 if (likely(bio)) { 314 if (likely(bio)) {
@@ -176,7 +318,15 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
176 if (likely(nr_iovecs)) { 318 if (likely(nr_iovecs)) {
177 unsigned long uninitialized_var(idx); 319 unsigned long uninitialized_var(idx);
178 320
179 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 321 if (nr_iovecs <= BIO_INLINE_VECS) {
322 idx = 0;
323 bvl = bio->bi_inline_vecs;
324 nr_iovecs = BIO_INLINE_VECS;
325 } else {
326 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx,
327 bs);
328 nr_iovecs = bvec_nr_vecs(idx);
329 }
180 if (unlikely(!bvl)) { 330 if (unlikely(!bvl)) {
181 if (bs) 331 if (bs)
182 mempool_free(bio, bs->bio_pool); 332 mempool_free(bio, bs->bio_pool);
@@ -186,7 +336,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
186 goto out; 336 goto out;
187 } 337 }
188 bio->bi_flags |= idx << BIO_POOL_OFFSET; 338 bio->bi_flags |= idx << BIO_POOL_OFFSET;
189 bio->bi_max_vecs = bvec_nr_vecs(idx); 339 bio->bi_max_vecs = nr_iovecs;
190 } 340 }
191 bio->bi_io_vec = bvl; 341 bio->bi_io_vec = bvl;
192 } 342 }
@@ -1346,30 +1496,18 @@ EXPORT_SYMBOL(bio_sector_offset);
1346 */ 1496 */
1347static int biovec_create_pools(struct bio_set *bs, int pool_entries) 1497static int biovec_create_pools(struct bio_set *bs, int pool_entries)
1348{ 1498{
1349 int i; 1499 struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
1350 1500
1351 for (i = 0; i < BIOVEC_NR_POOLS; i++) { 1501 bs->bvec_pool = mempool_create_slab_pool(pool_entries, bp->slab);
1352 struct biovec_slab *bp = bvec_slabs + i; 1502 if (!bs->bvec_pool)
1353 mempool_t **bvp = bs->bvec_pools + i; 1503 return -ENOMEM;
1354 1504
1355 *bvp = mempool_create_slab_pool(pool_entries, bp->slab);
1356 if (!*bvp)
1357 return -ENOMEM;
1358 }
1359 return 0; 1505 return 0;
1360} 1506}
1361 1507
1362static void biovec_free_pools(struct bio_set *bs) 1508static void biovec_free_pools(struct bio_set *bs)
1363{ 1509{
1364 int i; 1510 mempool_destroy(bs->bvec_pool);
1365
1366 for (i = 0; i < BIOVEC_NR_POOLS; i++) {
1367 mempool_t *bvp = bs->bvec_pools[i];
1368
1369 if (bvp)
1370 mempool_destroy(bvp);
1371 }
1372
1373} 1511}
1374 1512
1375void bioset_free(struct bio_set *bs) 1513void bioset_free(struct bio_set *bs)
@@ -1379,25 +1517,49 @@ void bioset_free(struct bio_set *bs)
1379 1517
1380 bioset_integrity_free(bs); 1518 bioset_integrity_free(bs);
1381 biovec_free_pools(bs); 1519 biovec_free_pools(bs);
1520 bio_put_slab(bs);
1382 1521
1383 kfree(bs); 1522 kfree(bs);
1384} 1523}
1385 1524
1386struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size) 1525/**
1526 * bioset_create - Create a bio_set
1527 * @pool_size: Number of bio and bio_vecs to cache in the mempool
1528 * @front_pad: Number of bytes to allocate in front of the returned bio
1529 *
1530 * Description:
1531 * Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
1532 * to ask for a number of bytes to be allocated in front of the bio.
1533 * Front pad allocation is useful for embedding the bio inside
1534 * another structure, to avoid allocating extra data to go with the bio.
1535 * Note that the bio must be embedded at the END of that structure always,
1536 * or things will break badly.
1537 */
1538struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
1387{ 1539{
1388 struct bio_set *bs = kzalloc(sizeof(*bs), GFP_KERNEL); 1540 unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
1541 struct bio_set *bs;
1389 1542
1543 bs = kzalloc(sizeof(*bs), GFP_KERNEL);
1390 if (!bs) 1544 if (!bs)
1391 return NULL; 1545 return NULL;
1392 1546
1393 bs->bio_pool = mempool_create_slab_pool(bio_pool_size, bio_slab); 1547 bs->front_pad = front_pad;
1548
1549 bs->bio_slab = bio_find_or_create_slab(front_pad + back_pad);
1550 if (!bs->bio_slab) {
1551 kfree(bs);
1552 return NULL;
1553 }
1554
1555 bs->bio_pool = mempool_create_slab_pool(pool_size, bs->bio_slab);
1394 if (!bs->bio_pool) 1556 if (!bs->bio_pool)
1395 goto bad; 1557 goto bad;
1396 1558
1397 if (bioset_integrity_create(bs, bio_pool_size)) 1559 if (bioset_integrity_create(bs, pool_size))
1398 goto bad; 1560 goto bad;
1399 1561
1400 if (!biovec_create_pools(bs, bvec_pool_size)) 1562 if (!biovec_create_pools(bs, pool_size))
1401 return bs; 1563 return bs;
1402 1564
1403bad: 1565bad:
@@ -1421,12 +1583,16 @@ static void __init biovec_init_slabs(void)
1421 1583
1422static int __init init_bio(void) 1584static int __init init_bio(void)
1423{ 1585{
1424 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 1586 bio_slab_max = 2;
1587 bio_slab_nr = 0;
1588 bio_slabs = kzalloc(bio_slab_max * sizeof(struct bio_slab), GFP_KERNEL);
1589 if (!bio_slabs)
1590 panic("bio: can't allocate bios\n");
1425 1591
1426 bio_integrity_init_slab(); 1592 bio_integrity_init_slab();
1427 biovec_init_slabs(); 1593 biovec_init_slabs();
1428 1594
1429 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); 1595 fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
1430 if (!fs_bio_set) 1596 if (!fs_bio_set)
1431 panic("bio: can't allocate bios\n"); 1597 panic("bio: can't allocate bios\n");
1432 1598
diff --git a/fs/buffer.c b/fs/buffer.c
index 10179cfa1152..776ae091d3b0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -99,10 +99,18 @@ __clear_page_buffers(struct page *page)
99 page_cache_release(page); 99 page_cache_release(page);
100} 100}
101 101
102
103static int quiet_error(struct buffer_head *bh)
104{
105 if (!test_bit(BH_Quiet, &bh->b_state) && printk_ratelimit())
106 return 0;
107 return 1;
108}
109
110
102static void buffer_io_error(struct buffer_head *bh) 111static void buffer_io_error(struct buffer_head *bh)
103{ 112{
104 char b[BDEVNAME_SIZE]; 113 char b[BDEVNAME_SIZE];
105
106 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", 114 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
107 bdevname(bh->b_bdev, b), 115 bdevname(bh->b_bdev, b),
108 (unsigned long long)bh->b_blocknr); 116 (unsigned long long)bh->b_blocknr);
@@ -144,7 +152,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
144 if (uptodate) { 152 if (uptodate) {
145 set_buffer_uptodate(bh); 153 set_buffer_uptodate(bh);
146 } else { 154 } else {
147 if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { 155 if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) {
148 buffer_io_error(bh); 156 buffer_io_error(bh);
149 printk(KERN_WARNING "lost page write due to " 157 printk(KERN_WARNING "lost page write due to "
150 "I/O error on %s\n", 158 "I/O error on %s\n",
@@ -394,7 +402,7 @@ static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
394 set_buffer_uptodate(bh); 402 set_buffer_uptodate(bh);
395 } else { 403 } else {
396 clear_buffer_uptodate(bh); 404 clear_buffer_uptodate(bh);
397 if (printk_ratelimit()) 405 if (!quiet_error(bh))
398 buffer_io_error(bh); 406 buffer_io_error(bh);
399 SetPageError(page); 407 SetPageError(page);
400 } 408 }
@@ -455,7 +463,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
455 if (uptodate) { 463 if (uptodate) {
456 set_buffer_uptodate(bh); 464 set_buffer_uptodate(bh);
457 } else { 465 } else {
458 if (printk_ratelimit()) { 466 if (!quiet_error(bh)) {
459 buffer_io_error(bh); 467 buffer_io_error(bh);
460 printk(KERN_WARNING "lost page write due to " 468 printk(KERN_WARNING "lost page write due to "
461 "I/O error on %s\n", 469 "I/O error on %s\n",
@@ -2913,6 +2921,9 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2913 set_bit(BH_Eopnotsupp, &bh->b_state); 2921 set_bit(BH_Eopnotsupp, &bh->b_state);
2914 } 2922 }
2915 2923
2924 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
2925 set_bit(BH_Quiet, &bh->b_state);
2926
2916 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); 2927 bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags));
2917 bio_put(bio); 2928 bio_put(bio);
2918} 2929}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e4a241c65dbe..04158ad74dbb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1721,7 +1721,7 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
1721 /* small i_blocks in vfs inode? */ 1721 /* small i_blocks in vfs inode? */
1722 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1722 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1723 /* 1723 /*
1724 * CONFIG_LSF is not enabled implies the inode 1724 * CONFIG_LBD is not enabled implies the inode
1725 * i_block represent total blocks in 512 bytes 1725 * i_block represent total blocks in 512 bytes
1726 * 32 == size of vfs inode i_blocks * 8 1726 * 32 == size of vfs inode i_blocks * 8
1727 */ 1727 */
@@ -1764,7 +1764,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1764 1764
1765 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { 1765 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1766 /* 1766 /*
1767 * !has_huge_files or CONFIG_LSF is not enabled 1767 * !has_huge_files or CONFIG_LBD is not enabled
1768 * implies the inode i_block represent total blocks in 1768 * implies the inode i_block represent total blocks in
1769 * 512 bytes 32 == size of vfs inode i_blocks * 8 1769 * 512 bytes 32 == size of vfs inode i_blocks * 8
1770 */ 1770 */
@@ -2021,13 +2021,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2021 if (has_huge_files) { 2021 if (has_huge_files) {
2022 /* 2022 /*
2023 * Large file size enabled file system can only be 2023 * Large file size enabled file system can only be
2024 * mount if kernel is build with CONFIG_LSF 2024 * mount if kernel is build with CONFIG_LBD
2025 */ 2025 */
2026 if (sizeof(root->i_blocks) < sizeof(u64) && 2026 if (sizeof(root->i_blocks) < sizeof(u64) &&
2027 !(sb->s_flags & MS_RDONLY)) { 2027 !(sb->s_flags & MS_RDONLY)) {
2028 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " 2028 printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
2029 "files cannot be mounted read-write " 2029 "files cannot be mounted read-write "
2030 "without CONFIG_LSF.\n", sb->s_id); 2030 "without CONFIG_LBD.\n", sb->s_id);
2031 goto failed_mount; 2031 goto failed_mount;
2032 } 2032 }
2033 } 2033 }
diff --git a/include/linux/aio.h b/include/linux/aio.h
index f6b8cf99b596..b16a957030f8 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -5,6 +5,7 @@
5#include <linux/workqueue.h> 5#include <linux/workqueue.h>
6#include <linux/aio_abi.h> 6#include <linux/aio_abi.h>
7#include <linux/uio.h> 7#include <linux/uio.h>
8#include <linux/rcupdate.h>
8 9
9#include <asm/atomic.h> 10#include <asm/atomic.h>
10 11
@@ -183,7 +184,7 @@ struct kioctx {
183 184
184 /* This needs improving */ 185 /* This needs improving */
185 unsigned long user_id; 186 unsigned long user_id;
186 struct kioctx *next; 187 struct hlist_node list;
187 188
188 wait_queue_head_t wait; 189 wait_queue_head_t wait;
189 190
@@ -199,6 +200,8 @@ struct kioctx {
199 struct aio_ring_info ring_info; 200 struct aio_ring_info ring_info;
200 201
201 struct delayed_work wq; 202 struct delayed_work wq;
203
204 struct rcu_head rcu_head;
202}; 205};
203 206
204/* prototypes */ 207/* prototypes */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 6a642098e5c3..18462c5b8fff 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -90,10 +90,11 @@ struct bio {
90 90
91 unsigned int bi_comp_cpu; /* completion CPU */ 91 unsigned int bi_comp_cpu; /* completion CPU */
92 92
93 atomic_t bi_cnt; /* pin count */
94
93 struct bio_vec *bi_io_vec; /* the actual vec list */ 95 struct bio_vec *bi_io_vec; /* the actual vec list */
94 96
95 bio_end_io_t *bi_end_io; 97 bio_end_io_t *bi_end_io;
96 atomic_t bi_cnt; /* pin count */
97 98
98 void *bi_private; 99 void *bi_private;
99#if defined(CONFIG_BLK_DEV_INTEGRITY) 100#if defined(CONFIG_BLK_DEV_INTEGRITY)
@@ -101,6 +102,13 @@ struct bio {
101#endif 102#endif
102 103
103 bio_destructor_t *bi_destructor; /* destructor */ 104 bio_destructor_t *bi_destructor; /* destructor */
105
106 /*
107 * We can inline a number of vecs at the end of the bio, to avoid
108 * double allocations for a small number of bio_vecs. This member
109 * MUST obviously be kept at the very end of the bio.
110 */
111 struct bio_vec bi_inline_vecs[0];
104}; 112};
105 113
106/* 114/*
@@ -117,6 +125,7 @@ struct bio {
117#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ 125#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */
118#define BIO_NULL_MAPPED 9 /* contains invalid user pages */ 126#define BIO_NULL_MAPPED 9 /* contains invalid user pages */
119#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ 127#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */
128#define BIO_QUIET 11 /* Make BIO Quiet */
120#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) 129#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
121 130
122/* 131/*
@@ -211,6 +220,11 @@ static inline void *bio_data(struct bio *bio)
211 return NULL; 220 return NULL;
212} 221}
213 222
223static inline int bio_has_allocated_vec(struct bio *bio)
224{
225 return bio->bi_io_vec && bio->bi_io_vec != bio->bi_inline_vecs;
226}
227
214/* 228/*
215 * will die 229 * will die
216 */ 230 */
@@ -332,7 +346,7 @@ struct bio_pair {
332extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); 346extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
333extern void bio_pair_release(struct bio_pair *dbio); 347extern void bio_pair_release(struct bio_pair *dbio);
334 348
335extern struct bio_set *bioset_create(int, int); 349extern struct bio_set *bioset_create(unsigned int, unsigned int);
336extern void bioset_free(struct bio_set *); 350extern void bioset_free(struct bio_set *);
337 351
338extern struct bio *bio_alloc(gfp_t, int); 352extern struct bio *bio_alloc(gfp_t, int);
@@ -377,6 +391,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
377extern int bio_uncopy_user(struct bio *); 391extern int bio_uncopy_user(struct bio *);
378void zero_fill_bio(struct bio *bio); 392void zero_fill_bio(struct bio *bio);
379extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); 393extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *);
394extern void bvec_free_bs(struct bio_set *, struct bio_vec *, unsigned int);
380extern unsigned int bvec_nr_vecs(unsigned short idx); 395extern unsigned int bvec_nr_vecs(unsigned short idx);
381 396
382/* 397/*
@@ -395,13 +410,17 @@ static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu)
395 */ 410 */
396#define BIO_POOL_SIZE 2 411#define BIO_POOL_SIZE 2
397#define BIOVEC_NR_POOLS 6 412#define BIOVEC_NR_POOLS 6
413#define BIOVEC_MAX_IDX (BIOVEC_NR_POOLS - 1)
398 414
399struct bio_set { 415struct bio_set {
416 struct kmem_cache *bio_slab;
417 unsigned int front_pad;
418
400 mempool_t *bio_pool; 419 mempool_t *bio_pool;
401#if defined(CONFIG_BLK_DEV_INTEGRITY) 420#if defined(CONFIG_BLK_DEV_INTEGRITY)
402 mempool_t *bio_integrity_pool; 421 mempool_t *bio_integrity_pool;
403#endif 422#endif
404 mempool_t *bvec_pools[BIOVEC_NR_POOLS]; 423 mempool_t *bvec_pool;
405}; 424};
406 425
407struct biovec_slab { 426struct biovec_slab {
@@ -411,6 +430,7 @@ struct biovec_slab {
411}; 430};
412 431
413extern struct bio_set *fs_bio_set; 432extern struct bio_set *fs_bio_set;
433extern struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly;
414 434
415/* 435/*
416 * a small number of entries is fine, not going to be performance critical. 436 * a small number of entries is fine, not going to be performance critical.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 031a315c0509..7035cec583b6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -26,7 +26,6 @@ struct scsi_ioctl_command;
26 26
27struct request_queue; 27struct request_queue;
28struct elevator_queue; 28struct elevator_queue;
29typedef struct elevator_queue elevator_t;
30struct request_pm_state; 29struct request_pm_state;
31struct blk_trace; 30struct blk_trace;
32struct request; 31struct request;
@@ -313,7 +312,7 @@ struct request_queue
313 */ 312 */
314 struct list_head queue_head; 313 struct list_head queue_head;
315 struct request *last_merge; 314 struct request *last_merge;
316 elevator_t *elevator; 315 struct elevator_queue *elevator;
317 316
318 /* 317 /*
319 * the queue request freelist, one for reads and one for writes 318 * the queue request freelist, one for reads and one for writes
@@ -449,6 +448,7 @@ struct request_queue
449#define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ 448#define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */
450#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ 449#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */
451#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ 450#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */
451#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
452 452
453static inline int queue_is_locked(struct request_queue *q) 453static inline int queue_is_locked(struct request_queue *q)
454{ 454{
@@ -522,22 +522,32 @@ enum {
522 * TAG_FLUSH : ordering by tag w/ pre and post flushes 522 * TAG_FLUSH : ordering by tag w/ pre and post flushes
523 * TAG_FUA : ordering by tag w/ pre flush and FUA write 523 * TAG_FUA : ordering by tag w/ pre flush and FUA write
524 */ 524 */
525 QUEUE_ORDERED_NONE = 0x00, 525 QUEUE_ORDERED_BY_DRAIN = 0x01,
526 QUEUE_ORDERED_DRAIN = 0x01, 526 QUEUE_ORDERED_BY_TAG = 0x02,
527 QUEUE_ORDERED_TAG = 0x02, 527 QUEUE_ORDERED_DO_PREFLUSH = 0x10,
528 528 QUEUE_ORDERED_DO_BAR = 0x20,
529 QUEUE_ORDERED_PREFLUSH = 0x10, 529 QUEUE_ORDERED_DO_POSTFLUSH = 0x40,
530 QUEUE_ORDERED_POSTFLUSH = 0x20, 530 QUEUE_ORDERED_DO_FUA = 0x80,
531 QUEUE_ORDERED_FUA = 0x40, 531
532 532 QUEUE_ORDERED_NONE = 0x00,
533 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | 533
534 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, 534 QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN |
535 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | 535 QUEUE_ORDERED_DO_BAR,
536 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, 536 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
537 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | 537 QUEUE_ORDERED_DO_PREFLUSH |
538 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, 538 QUEUE_ORDERED_DO_POSTFLUSH,
539 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | 539 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
540 QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, 540 QUEUE_ORDERED_DO_PREFLUSH |
541 QUEUE_ORDERED_DO_FUA,
542
543 QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG |
544 QUEUE_ORDERED_DO_BAR,
545 QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
546 QUEUE_ORDERED_DO_PREFLUSH |
547 QUEUE_ORDERED_DO_POSTFLUSH,
548 QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
549 QUEUE_ORDERED_DO_PREFLUSH |
550 QUEUE_ORDERED_DO_FUA,
541 551
542 /* 552 /*
543 * Ordered operation sequence 553 * Ordered operation sequence
@@ -585,7 +595,6 @@ enum {
585#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) 595#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA)
586#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) 596#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD)
587#define blk_bidi_rq(rq) ((rq)->next_rq != NULL) 597#define blk_bidi_rq(rq) ((rq)->next_rq != NULL)
588#define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors)
589/* rq->queuelist of dequeued request must be list_empty() */ 598/* rq->queuelist of dequeued request must be list_empty() */
590#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) 599#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist))
591 600
@@ -855,10 +864,10 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
855extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); 864extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
856extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); 865extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
857extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); 866extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
858extern int blk_do_ordered(struct request_queue *, struct request **); 867extern bool blk_do_ordered(struct request_queue *, struct request **);
859extern unsigned blk_ordered_cur_seq(struct request_queue *); 868extern unsigned blk_ordered_cur_seq(struct request_queue *);
860extern unsigned blk_ordered_req_seq(struct request *); 869extern unsigned blk_ordered_req_seq(struct request *);
861extern void blk_ordered_complete_seq(struct request_queue *, unsigned, int); 870extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
862 871
863extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); 872extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
864extern void blk_dump_rq_flags(struct request *, char *); 873extern void blk_dump_rq_flags(struct request *, char *);
@@ -977,7 +986,6 @@ static inline void put_dev_sector(Sector p)
977 986
978struct work_struct; 987struct work_struct;
979int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); 988int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
980void kblockd_flush_work(struct work_struct *work);
981 989
982#define MODULE_ALIAS_BLOCKDEV(major,minor) \ 990#define MODULE_ALIAS_BLOCKDEV(major,minor) \
983 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) 991 MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 3ce64b90118c..8605f8a74df9 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -35,6 +35,7 @@ enum bh_state_bits {
35 BH_Ordered, /* ordered write */ 35 BH_Ordered, /* ordered write */
36 BH_Eopnotsupp, /* operation not supported (barrier) */ 36 BH_Eopnotsupp, /* operation not supported (barrier) */
37 BH_Unwritten, /* Buffer is allocated on disk but not written */ 37 BH_Unwritten, /* Buffer is allocated on disk but not written */
38 BH_Quiet, /* Buffer Error Prinks to be quiet */
38 39
39 BH_PrivateStart,/* not a state bit, but the first bit available 40 BH_PrivateStart,/* not a state bit, but the first bit available
40 * for private allocation by other entities 41 * for private allocation by other entities
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 92f6f634e3e6..7a204256b155 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -28,7 +28,7 @@ typedef void (elevator_activate_req_fn) (struct request_queue *, struct request
28typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); 28typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *);
29 29
30typedef void *(elevator_init_fn) (struct request_queue *); 30typedef void *(elevator_init_fn) (struct request_queue *);
31typedef void (elevator_exit_fn) (elevator_t *); 31typedef void (elevator_exit_fn) (struct elevator_queue *);
32 32
33struct elevator_ops 33struct elevator_ops
34{ 34{
@@ -62,8 +62,8 @@ struct elevator_ops
62 62
63struct elv_fs_entry { 63struct elv_fs_entry {
64 struct attribute attr; 64 struct attribute attr;
65 ssize_t (*show)(elevator_t *, char *); 65 ssize_t (*show)(struct elevator_queue *, char *);
66 ssize_t (*store)(elevator_t *, const char *, size_t); 66 ssize_t (*store)(struct elevator_queue *, const char *, size_t);
67}; 67};
68 68
69/* 69/*
@@ -130,7 +130,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *);
130extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); 130extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
131 131
132extern int elevator_init(struct request_queue *, char *); 132extern int elevator_init(struct request_queue *, char *);
133extern void elevator_exit(elevator_t *); 133extern void elevator_exit(struct elevator_queue *);
134extern int elv_rq_merge_ok(struct request *, struct bio *); 134extern int elv_rq_merge_ok(struct request *, struct bio *);
135 135
136/* 136/*
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 3df7742ce246..16948eaecae3 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -126,6 +126,7 @@ struct blk_scsi_cmd_filter {
126struct disk_part_tbl { 126struct disk_part_tbl {
127 struct rcu_head rcu_head; 127 struct rcu_head rcu_head;
128 int len; 128 int len;
129 struct hd_struct *last_lookup;
129 struct hd_struct *part[]; 130 struct hd_struct *part[];
130}; 131};
131 132
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fe825471d5aa..9cfc9b627fdd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -232,8 +232,9 @@ struct mm_struct {
232 struct core_state *core_state; /* coredumping support */ 232 struct core_state *core_state; /* coredumping support */
233 233
234 /* aio bits */ 234 /* aio bits */
235 rwlock_t ioctx_list_lock; /* aio lock */ 235 spinlock_t ioctx_lock;
236 struct kioctx *ioctx_list; 236 struct hlist_head ioctx_list;
237
237#ifdef CONFIG_MM_OWNER 238#ifdef CONFIG_MM_OWNER
238 /* 239 /*
239 * "owner" points to a task that is regarded as the canonical 240 * "owner" points to a task that is regarded as the canonical
diff --git a/include/linux/types.h b/include/linux/types.h
index 1d98330b1f2c..121f349cb7ec 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -135,19 +135,14 @@ typedef __s64 int64_t;
135 * 135 *
136 * Linux always considers sectors to be 512 bytes long independently 136 * Linux always considers sectors to be 512 bytes long independently
137 * of the devices real block size. 137 * of the devices real block size.
138 *
139 * blkcnt_t is the type of the inode's block count.
138 */ 140 */
139#ifdef CONFIG_LBD 141#ifdef CONFIG_LBD
140typedef u64 sector_t; 142typedef u64 sector_t;
141#else
142typedef unsigned long sector_t;
143#endif
144
145/*
146 * The type of the inode's block count.
147 */
148#ifdef CONFIG_LSF
149typedef u64 blkcnt_t; 143typedef u64 blkcnt_t;
150#else 144#else
145typedef unsigned long sector_t;
151typedef unsigned long blkcnt_t; 146typedef unsigned long blkcnt_t;
152#endif 147#endif
153 148
diff --git a/kernel/exit.c b/kernel/exit.c
index a946221879d7..c9e5a1c14e08 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1037,8 +1037,6 @@ NORET_TYPE void do_exit(long code)
1037 * task into the wait for ever nirwana as well. 1037 * task into the wait for ever nirwana as well.
1038 */ 1038 */
1039 tsk->flags |= PF_EXITPIDONE; 1039 tsk->flags |= PF_EXITPIDONE;
1040 if (tsk->io_context)
1041 exit_io_context();
1042 set_current_state(TASK_UNINTERRUPTIBLE); 1040 set_current_state(TASK_UNINTERRUPTIBLE);
1043 schedule(); 1041 schedule();
1044 } 1042 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 6144b36cd897..43cbf30669e6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -415,8 +415,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
415 set_mm_counter(mm, file_rss, 0); 415 set_mm_counter(mm, file_rss, 0);
416 set_mm_counter(mm, anon_rss, 0); 416 set_mm_counter(mm, anon_rss, 0);
417 spin_lock_init(&mm->page_table_lock); 417 spin_lock_init(&mm->page_table_lock);
418 rwlock_init(&mm->ioctx_list_lock); 418 spin_lock_init(&mm->ioctx_lock);
419 mm->ioctx_list = NULL; 419 INIT_HLIST_HEAD(&mm->ioctx_list);
420 mm->free_area_cache = TASK_UNMAPPED_BASE; 420 mm->free_area_cache = TASK_UNMAPPED_BASE;
421 mm->cached_hole_size = ~0UL; 421 mm->cached_hole_size = ~0UL;
422 mm_init_owner(mm, p); 422 mm_init_owner(mm, p);
diff --git a/mm/bounce.c b/mm/bounce.c
index bf0cf7c8387b..e590272fe7a8 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -198,8 +198,13 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
198 /* 198 /*
199 * irk, bounce it 199 * irk, bounce it
200 */ 200 */
201 if (!bio) 201 if (!bio) {
202 bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt); 202 unsigned int cnt = (*bio_orig)->bi_vcnt;
203
204 bio = bio_alloc(GFP_NOIO, cnt);
205 memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec));
206 }
207
203 208
204 to = bio->bi_io_vec + i; 209 to = bio->bi_io_vec + i;
205 210