aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-08-19 13:47:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-08-19 13:47:07 -0400
commit5ccc38740a283aba81a00e92941310d0c1aeb2ee (patch)
treeba7d725947975a9391e085bd1d5958b004bfdc3e
parent0c3bef612881ee6216a36952ffaabfc35b83545c (diff)
parentb53d1ed734a2b9af8da115b836b658daa7d47a48 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
* 'for-linus' of git://git.kernel.dk/linux-block: (23 commits) Revert "cfq: Remove special treatment for metadata rqs." block: fix flush machinery for stacking drivers with differring flush flags block: improve rq_affinity placement blktrace: add FLUSH/FUA support Move some REQ flags to the common bio/request area allow blk_flush_policy to return REQ_FSEQ_DATA independent of *FLUSH xen/blkback: Make description more obvious. cfq-iosched: Add documentation about idling block: Make rq_affinity = 1 work as expected block: swim3: fix unterminated of_device_id table block/genhd.c: remove useless cast in diskstats_show() drivers/cdrom/cdrom.c: relax check on dvd manufacturer value drivers/block/drbd/drbd_nl.c: use bitmap_parse instead of __bitmap_parse bsg-lib: add module.h include cfq-iosched: Reduce linked group count upon group destruction blk-throttle: correctly determine sync bio loop: fix deadlock when sysfs and LOOP_CLR_FD race against each other loop: add BLK_DEV_LOOP_MIN_COUNT=%i to allow distros 0 pre-allocated loop devices loop: add management interface for on-demand device allocation loop: replace linked list of allocated devices with an idr index ...
-rw-r--r--Documentation/block/cfq-iosched.txt71
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--block/Kconfig10
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-core.c8
-rw-r--r--block/blk-flush.c25
-rw-r--r--block/blk-softirq.c8
-rw-r--r--block/blk-throttle.c4
-rw-r--r--block/blk.h2
-rw-r--r--block/bsg-lib.c298
-rw-r--r--block/cfq-iosched.c21
-rw-r--r--block/genhd.c8
-rw-r--r--drivers/block/Kconfig17
-rw-r--r--drivers/block/drbd/drbd_nl.c4
-rw-r--r--drivers/block/loop.c297
-rw-r--r--drivers/block/swim3.c1
-rw-r--r--drivers/block/xen-blkfront.c6
-rw-r--r--drivers/cdrom/cdrom.c8
-rw-r--r--include/linux/blk_types.h7
-rw-r--r--include/linux/blkdev.h5
-rw-r--r--include/linux/blktrace_api.h5
-rw-r--r--include/linux/bsg-lib.h73
-rw-r--r--include/linux/loop.h5
-rw-r--r--include/linux/miscdevice.h1
-rw-r--r--include/trace/events/block.h20
-rw-r--r--kernel/trace/blktrace.c21
26 files changed, 800 insertions, 135 deletions
diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt
index e578feed6d8..6d670f57045 100644
--- a/Documentation/block/cfq-iosched.txt
+++ b/Documentation/block/cfq-iosched.txt
@@ -43,3 +43,74 @@ If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches
43to IOPS mode and starts providing fairness in terms of number of requests 43to IOPS mode and starts providing fairness in terms of number of requests
44dispatched. Note that this mode switching takes effect only for group 44dispatched. Note that this mode switching takes effect only for group
45scheduling. For non-cgroup users nothing should change. 45scheduling. For non-cgroup users nothing should change.
46
47CFQ IO scheduler Idling Theory
48===============================
49Idling on a queue is primarily about waiting for the next request to come
50on same queue after completion of a request. In this process CFQ will not
51dispatch requests from other cfq queues even if requests are pending there.
52
53The rationale behind idling is that it can cut down on number of seeks
54on rotational media. For example, if a process is doing dependent
55sequential reads (next read will come on only after completion of previous
56one), then not dispatching request from other queue should help as we
57did not move the disk head and kept on dispatching sequential IO from
58one queue.
59
60CFQ has following service trees and various queues are put on these trees.
61
62 sync-idle sync-noidle async
63
64All cfq queues doing synchronous sequential IO go on to sync-idle tree.
65On this tree we idle on each queue individually.
66
67All synchronous non-sequential queues go on sync-noidle tree. Also any
68request which are marked with REQ_NOIDLE go on this service tree. On this
69tree we do not idle on individual queues instead idle on the whole group
70of queues or the tree. So if there are 4 queues waiting for IO to dispatch
71we will idle only once last queue has dispatched the IO and there is
72no more IO on this service tree.
73
74All async writes go on async service tree. There is no idling on async
75queues.
76
77CFQ has some optimizations for SSDs and if it detects a non-rotational
78media which can support higher queue depth (multiple requests at in
79flight at a time), then it cuts down on idling of individual queues and
80all the queues move to sync-noidle tree and only tree idle remains. This
81tree idling provides isolation with buffered write queues on async tree.
82
83FAQ
84===
85Q1. Why to idle at all on queues marked with REQ_NOIDLE.
86
87A1. We only do tree idle (all queues on sync-noidle tree) on queues marked
88 with REQ_NOIDLE. This helps in providing isolation with all the sync-idle
89 queues. Otherwise in presence of many sequential readers, other
90 synchronous IO might not get fair share of disk.
91
92 For example, if there are 10 sequential readers doing IO and they get
93 100ms each. If a REQ_NOIDLE request comes in, it will be scheduled
94 roughly after 1 second. If after completion of REQ_NOIDLE request we
95 do not idle, and after a couple of milli seconds a another REQ_NOIDLE
96 request comes in, again it will be scheduled after 1second. Repeat it
97 and notice how a workload can lose its disk share and suffer due to
98 multiple sequential readers.
99
100 fsync can generate dependent IO where bunch of data is written in the
101 context of fsync, and later some journaling data is written. Journaling
102 data comes in only after fsync has finished its IO (atleast for ext4
103 that seemed to be the case). Now if one decides not to idle on fsync
104 thread due to REQ_NOIDLE, then next journaling write will not get
105 scheduled for another second. A process doing small fsync, will suffer
106 badly in presence of multiple sequential readers.
107
108 Hence doing tree idling on threads using REQ_NOIDLE flag on requests
109 provides isolation from multiple sequential readers and at the same
110 time we do not idle on individual threads.
111
112Q2. When to specify REQ_NOIDLE
113A2. I would think whenever one is doing synchronous write and not expecting
114 more writes to be dispatched from same context soon, should be able
115 to specify REQ_NOIDLE on writes and that probably should work well for
116 most of the cases.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6ca1f5cb71e..614d0382e2c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1350,9 +1350,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1350 it is equivalent to "nosmp", which also disables 1350 it is equivalent to "nosmp", which also disables
1351 the IO APIC. 1351 the IO APIC.
1352 1352
1353 max_loop= [LOOP] Maximum number of loopback devices that can 1353 max_loop= [LOOP] The number of loop block devices that get
1354 be mounted 1354 (loop.max_loop) unconditionally pre-created at init time. The default
1355 Format: <1-256> 1355 number is configured by BLK_DEV_LOOP_MIN_COUNT. Instead
1356 of statically allocating a predefined number, loop
1357 devices can be requested on-demand with the
1358 /dev/loop-control interface.
1356 1359
1357 mcatest= [IA-64] 1360 mcatest= [IA-64]
1358 1361
diff --git a/block/Kconfig b/block/Kconfig
index 60be1e0455d..e97934eecec 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -65,6 +65,16 @@ config BLK_DEV_BSG
65 65
66 If unsure, say Y. 66 If unsure, say Y.
67 67
68config BLK_DEV_BSGLIB
69 bool "Block layer SG support v4 helper lib"
70 default n
71 select BLK_DEV_BSG
72 help
73 Subsystems will normally enable this if needed. Users will not
74 normally need to manually enable this.
75
76 If unsure, say N.
77
68config BLK_DEV_INTEGRITY 78config BLK_DEV_INTEGRITY
69 bool "Block layer data integrity support" 79 bool "Block layer data integrity support"
70 ---help--- 80 ---help---
diff --git a/block/Makefile b/block/Makefile
index 0fec4b3fab5..514c6e4f427 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
8 blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o 8 blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
11obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o 12obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
12obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o 13obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
13obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 14obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index b627558c461..90e1ffdeb41 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1702,6 +1702,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits);
1702int blk_insert_cloned_request(struct request_queue *q, struct request *rq) 1702int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1703{ 1703{
1704 unsigned long flags; 1704 unsigned long flags;
1705 int where = ELEVATOR_INSERT_BACK;
1705 1706
1706 if (blk_rq_check_limits(q, rq)) 1707 if (blk_rq_check_limits(q, rq))
1707 return -EIO; 1708 return -EIO;
@@ -1718,7 +1719,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1718 */ 1719 */
1719 BUG_ON(blk_queued_rq(rq)); 1720 BUG_ON(blk_queued_rq(rq));
1720 1721
1721 add_acct_request(q, rq, ELEVATOR_INSERT_BACK); 1722 if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
1723 where = ELEVATOR_INSERT_FLUSH;
1724
1725 add_acct_request(q, rq, where);
1722 spin_unlock_irqrestore(q->queue_lock, flags); 1726 spin_unlock_irqrestore(q->queue_lock, flags);
1723 1727
1724 return 0; 1728 return 0;
@@ -2275,7 +2279,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
2275 * %false - we are done with this request 2279 * %false - we are done with this request
2276 * %true - still buffers pending for this request 2280 * %true - still buffers pending for this request
2277 **/ 2281 **/
2278static bool __blk_end_bidi_request(struct request *rq, int error, 2282bool __blk_end_bidi_request(struct request *rq, int error,
2279 unsigned int nr_bytes, unsigned int bidi_bytes) 2283 unsigned int nr_bytes, unsigned int bidi_bytes)
2280{ 2284{
2281 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) 2285 if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
diff --git a/block/blk-flush.c b/block/blk-flush.c
index bb21e4c36f7..491eb30a242 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
95{ 95{
96 unsigned int policy = 0; 96 unsigned int policy = 0;
97 97
98 if (blk_rq_sectors(rq))
99 policy |= REQ_FSEQ_DATA;
100
98 if (fflags & REQ_FLUSH) { 101 if (fflags & REQ_FLUSH) {
99 if (rq->cmd_flags & REQ_FLUSH) 102 if (rq->cmd_flags & REQ_FLUSH)
100 policy |= REQ_FSEQ_PREFLUSH; 103 policy |= REQ_FSEQ_PREFLUSH;
101 if (blk_rq_sectors(rq))
102 policy |= REQ_FSEQ_DATA;
103 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) 104 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
104 policy |= REQ_FSEQ_POSTFLUSH; 105 policy |= REQ_FSEQ_POSTFLUSH;
105 } 106 }
@@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq)
122 123
123 /* make @rq a normal request */ 124 /* make @rq a normal request */
124 rq->cmd_flags &= ~REQ_FLUSH_SEQ; 125 rq->cmd_flags &= ~REQ_FLUSH_SEQ;
125 rq->end_io = NULL; 126 rq->end_io = rq->flush.saved_end_io;
126} 127}
127 128
128/** 129/**
@@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq)
300 unsigned int fflags = q->flush_flags; /* may change, cache */ 301 unsigned int fflags = q->flush_flags; /* may change, cache */
301 unsigned int policy = blk_flush_policy(fflags, rq); 302 unsigned int policy = blk_flush_policy(fflags, rq);
302 303
303 BUG_ON(rq->end_io);
304 BUG_ON(!rq->bio || rq->bio != rq->biotail);
305
306 /* 304 /*
307 * @policy now records what operations need to be done. Adjust 305 * @policy now records what operations need to be done. Adjust
308 * REQ_FLUSH and FUA for the driver. 306 * REQ_FLUSH and FUA for the driver.
@@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq)
312 rq->cmd_flags &= ~REQ_FUA; 310 rq->cmd_flags &= ~REQ_FUA;
313 311
314 /* 312 /*
313 * An empty flush handed down from a stacking driver may
314 * translate into nothing if the underlying device does not
315 * advertise a write-back cache. In this case, simply
316 * complete the request.
317 */
318 if (!policy) {
319 __blk_end_bidi_request(rq, 0, 0, 0);
320 return;
321 }
322
323 BUG_ON(!rq->bio || rq->bio != rq->biotail);
324
325 /*
315 * If there's data but flush is not necessary, the request can be 326 * If there's data but flush is not necessary, the request can be
316 * processed directly without going through flush machinery. Queue 327 * processed directly without going through flush machinery. Queue
317 * for normal execution. 328 * for normal execution.
@@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq)
319 if ((policy & REQ_FSEQ_DATA) && 330 if ((policy & REQ_FSEQ_DATA) &&
320 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { 331 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
321 list_add_tail(&rq->queuelist, &q->queue_head); 332 list_add_tail(&rq->queuelist, &q->queue_head);
333 blk_run_queue_async(q);
322 return; 334 return;
323 } 335 }
324 336
@@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq)
329 memset(&rq->flush, 0, sizeof(rq->flush)); 341 memset(&rq->flush, 0, sizeof(rq->flush));
330 INIT_LIST_HEAD(&rq->flush.list); 342 INIT_LIST_HEAD(&rq->flush.list);
331 rq->cmd_flags |= REQ_FLUSH_SEQ; 343 rq->cmd_flags |= REQ_FLUSH_SEQ;
344 rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
332 rq->end_io = flush_data_end_io; 345 rq->end_io = flush_data_end_io;
333 346
334 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); 347 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 475fab809a8..58340d0cb23 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req)
124 } else 124 } else
125 ccpu = cpu; 125 ccpu = cpu;
126 126
127 /*
128 * If current CPU and requested CPU are in the same group, running
129 * softirq in current CPU. One might concern this is just like
130 * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is
131 * running in interrupt handler, and currently I/O controller doesn't
132 * support multiple interrupts, so current CPU is unique actually. This
133 * avoids IPI sending from current CPU to the first CPU of a group.
134 */
127 if (ccpu == cpu || ccpu == group_cpu) { 135 if (ccpu == cpu || ccpu == group_cpu) {
128 struct list_head *list; 136 struct list_head *list;
129do_local: 137do_local:
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f6a79412050..a19f58c6fc3 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
746static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) 746static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
747{ 747{
748 bool rw = bio_data_dir(bio); 748 bool rw = bio_data_dir(bio);
749 bool sync = bio->bi_rw & REQ_SYNC; 749 bool sync = rw_is_sync(bio->bi_rw);
750 750
751 /* Charge the bio to the group */ 751 /* Charge the bio to the group */
752 tg->bytes_disp[rw] += bio->bi_size; 752 tg->bytes_disp[rw] += bio->bi_size;
@@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop)
1150 1150
1151 if (tg_no_rule_group(tg, rw)) { 1151 if (tg_no_rule_group(tg, rw)) {
1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, 1152 blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size,
1153 rw, bio->bi_rw & REQ_SYNC); 1153 rw, rw_is_sync(bio->bi_rw));
1154 rcu_read_unlock(); 1154 rcu_read_unlock();
1155 return 0; 1155 return 0;
1156 } 1156 }
diff --git a/block/blk.h b/block/blk.h
index d6586287adc..20b900a377c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
17 struct bio *bio); 17 struct bio *bio);
18void blk_dequeue_request(struct request *rq); 18void blk_dequeue_request(struct request *rq);
19void __blk_queue_free_tags(struct request_queue *q); 19void __blk_queue_free_tags(struct request_queue *q);
20bool __blk_end_bidi_request(struct request *rq, int error,
21 unsigned int nr_bytes, unsigned int bidi_bytes);
20 22
21void blk_rq_timed_out_timer(unsigned long data); 23void blk_rq_timed_out_timer(unsigned long data);
22void blk_delete_timer(struct request *); 24void blk_delete_timer(struct request *);
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
new file mode 100644
index 00000000000..6690e6e4103
--- /dev/null
+++ b/block/bsg-lib.c
@@ -0,0 +1,298 @@
1/*
2 * BSG helper library
3 *
4 * Copyright (C) 2008 James Smart, Emulex Corporation
5 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
6 * Copyright (C) 2011 Mike Christie
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 */
23#include <linux/slab.h>
24#include <linux/blkdev.h>
25#include <linux/delay.h>
26#include <linux/scatterlist.h>
27#include <linux/bsg-lib.h>
28#include <linux/module.h>
29#include <scsi/scsi_cmnd.h>
30
31/**
32 * bsg_destroy_job - routine to teardown/delete a bsg job
33 * @job: bsg_job that is to be torn down
34 */
35static void bsg_destroy_job(struct bsg_job *job)
36{
37 put_device(job->dev); /* release reference for the request */
38
39 kfree(job->request_payload.sg_list);
40 kfree(job->reply_payload.sg_list);
41 kfree(job);
42}
43
44/**
45 * bsg_job_done - completion routine for bsg requests
46 * @job: bsg_job that is complete
47 * @result: job reply result
48 * @reply_payload_rcv_len: length of payload recvd
49 *
50 * The LLD should call this when the bsg job has completed.
51 */
52void bsg_job_done(struct bsg_job *job, int result,
53 unsigned int reply_payload_rcv_len)
54{
55 struct request *req = job->req;
56 struct request *rsp = req->next_rq;
57 int err;
58
59 err = job->req->errors = result;
60 if (err < 0)
61 /* we're only returning the result field in the reply */
62 job->req->sense_len = sizeof(u32);
63 else
64 job->req->sense_len = job->reply_len;
65 /* we assume all request payload was transferred, residual == 0 */
66 req->resid_len = 0;
67
68 if (rsp) {
69 WARN_ON(reply_payload_rcv_len > rsp->resid_len);
70
71 /* set reply (bidi) residual */
72 rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len);
73 }
74 blk_complete_request(req);
75}
76EXPORT_SYMBOL_GPL(bsg_job_done);
77
78/**
79 * bsg_softirq_done - softirq done routine for destroying the bsg requests
80 * @rq: BSG request that holds the job to be destroyed
81 */
82static void bsg_softirq_done(struct request *rq)
83{
84 struct bsg_job *job = rq->special;
85
86 blk_end_request_all(rq, rq->errors);
87 bsg_destroy_job(job);
88}
89
90static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
91{
92 size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments);
93
94 BUG_ON(!req->nr_phys_segments);
95
96 buf->sg_list = kzalloc(sz, GFP_KERNEL);
97 if (!buf->sg_list)
98 return -ENOMEM;
99 sg_init_table(buf->sg_list, req->nr_phys_segments);
100 buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list);
101 buf->payload_len = blk_rq_bytes(req);
102 return 0;
103}
104
105/**
106 * bsg_create_job - create the bsg_job structure for the bsg request
107 * @dev: device that is being sent the bsg request
108 * @req: BSG request that needs a job structure
109 */
110static int bsg_create_job(struct device *dev, struct request *req)
111{
112 struct request *rsp = req->next_rq;
113 struct request_queue *q = req->q;
114 struct bsg_job *job;
115 int ret;
116
117 BUG_ON(req->special);
118
119 job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
120 if (!job)
121 return -ENOMEM;
122
123 req->special = job;
124 job->req = req;
125 if (q->bsg_job_size)
126 job->dd_data = (void *)&job[1];
127 job->request = req->cmd;
128 job->request_len = req->cmd_len;
129 job->reply = req->sense;
130 job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
131 * allocated */
132 if (req->bio) {
133 ret = bsg_map_buffer(&job->request_payload, req);
134 if (ret)
135 goto failjob_rls_job;
136 }
137 if (rsp && rsp->bio) {
138 ret = bsg_map_buffer(&job->reply_payload, rsp);
139 if (ret)
140 goto failjob_rls_rqst_payload;
141 }
142 job->dev = dev;
143 /* take a reference for the request */
144 get_device(job->dev);
145 return 0;
146
147failjob_rls_rqst_payload:
148 kfree(job->request_payload.sg_list);
149failjob_rls_job:
150 kfree(job);
151 return -ENOMEM;
152}
153
154/*
155 * bsg_goose_queue - restart queue in case it was stopped
156 * @q: request q to be restarted
157 */
158void bsg_goose_queue(struct request_queue *q)
159{
160 if (!q)
161 return;
162
163 blk_run_queue_async(q);
164}
165EXPORT_SYMBOL_GPL(bsg_goose_queue);
166
167/**
168 * bsg_request_fn - generic handler for bsg requests
169 * @q: request queue to manage
170 *
171 * On error the create_bsg_job function should return a -Exyz error value
172 * that will be set to the req->errors.
173 *
174 * Drivers/subsys should pass this to the queue init function.
175 */
176void bsg_request_fn(struct request_queue *q)
177{
178 struct device *dev = q->queuedata;
179 struct request *req;
180 struct bsg_job *job;
181 int ret;
182
183 if (!get_device(dev))
184 return;
185
186 while (1) {
187 req = blk_fetch_request(q);
188 if (!req)
189 break;
190 spin_unlock_irq(q->queue_lock);
191
192 ret = bsg_create_job(dev, req);
193 if (ret) {
194 req->errors = ret;
195 blk_end_request_all(req, ret);
196 spin_lock_irq(q->queue_lock);
197 continue;
198 }
199
200 job = req->special;
201 ret = q->bsg_job_fn(job);
202 spin_lock_irq(q->queue_lock);
203 if (ret)
204 break;
205 }
206
207 spin_unlock_irq(q->queue_lock);
208 put_device(dev);
209 spin_lock_irq(q->queue_lock);
210}
211EXPORT_SYMBOL_GPL(bsg_request_fn);
212
213/**
214 * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
215 * @dev: device to attach bsg device to
216 * @q: request queue setup by caller
217 * @name: device to give bsg device
218 * @job_fn: bsg job handler
219 * @dd_job_size: size of LLD data needed for each job
220 *
221 * The caller should have setup the reuqest queue with bsg_request_fn
222 * as the request_fn.
223 */
224int bsg_setup_queue(struct device *dev, struct request_queue *q,
225 char *name, bsg_job_fn *job_fn, int dd_job_size)
226{
227 int ret;
228
229 q->queuedata = dev;
230 q->bsg_job_size = dd_job_size;
231 q->bsg_job_fn = job_fn;
232 queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
233 blk_queue_softirq_done(q, bsg_softirq_done);
234 blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
235
236 ret = bsg_register_queue(q, dev, name, NULL);
237 if (ret) {
238 printk(KERN_ERR "%s: bsg interface failed to "
239 "initialize - register queue\n", dev->kobj.name);
240 return ret;
241 }
242
243 return 0;
244}
245EXPORT_SYMBOL_GPL(bsg_setup_queue);
246
247/**
248 * bsg_remove_queue - Deletes the bsg dev from the q
249 * @q: the request_queue that is to be torn down.
250 *
251 * Notes:
252 * Before unregistering the queue empty any requests that are blocked
253 */
254void bsg_remove_queue(struct request_queue *q)
255{
256 struct request *req; /* block request */
257 int counts; /* totals for request_list count and starved */
258
259 if (!q)
260 return;
261
262 /* Stop taking in new requests */
263 spin_lock_irq(q->queue_lock);
264 blk_stop_queue(q);
265
266 /* drain all requests in the queue */
267 while (1) {
268 /* need the lock to fetch a request
269 * this may fetch the same reqeust as the previous pass
270 */
271 req = blk_fetch_request(q);
272 /* save requests in use and starved */
273 counts = q->rq.count[0] + q->rq.count[1] +
274 q->rq.starved[0] + q->rq.starved[1];
275 spin_unlock_irq(q->queue_lock);
276 /* any requests still outstanding? */
277 if (counts == 0)
278 break;
279
280 /* This may be the same req as the previous iteration,
281 * always send the blk_end_request_all after a prefetch.
282 * It is not okay to not end the request because the
283 * prefetch started the request.
284 */
285 if (req) {
286 /* return -ENXIO to indicate that this queue is
287 * going away
288 */
289 req->errors = -ENXIO;
290 blk_end_request_all(req, -ENXIO);
291 }
292
293 msleep(200); /* allow bsg to possibly finish */
294 spin_lock_irq(q->queue_lock);
295 }
296 bsg_unregister_queue(q);
297}
298EXPORT_SYMBOL_GPL(bsg_remove_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1f96ad6254f..a33bd4377c6 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -130,6 +130,8 @@ struct cfq_queue {
130 unsigned long slice_end; 130 unsigned long slice_end;
131 long slice_resid; 131 long slice_resid;
132 132
133 /* pending metadata requests */
134 int meta_pending;
133 /* number of requests that are on the dispatch list or inside driver */ 135 /* number of requests that are on the dispatch list or inside driver */
134 int dispatched; 136 int dispatched;
135 137
@@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2,
682 if (rq_is_sync(rq1) != rq_is_sync(rq2)) 684 if (rq_is_sync(rq1) != rq_is_sync(rq2))
683 return rq_is_sync(rq1) ? rq1 : rq2; 685 return rq_is_sync(rq1) ? rq1 : rq2;
684 686
687 if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META)
688 return rq1->cmd_flags & REQ_META ? rq1 : rq2;
689
685 s1 = blk_rq_pos(rq1); 690 s1 = blk_rq_pos(rq1);
686 s2 = blk_rq_pos(rq2); 691 s2 = blk_rq_pos(rq2);
687 692
@@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
1209 1214
1210 hlist_del_init(&cfqg->cfqd_node); 1215 hlist_del_init(&cfqg->cfqd_node);
1211 1216
1217 BUG_ON(cfqd->nr_blkcg_linked_grps <= 0);
1218 cfqd->nr_blkcg_linked_grps--;
1219
1212 /* 1220 /*
1213 * Put the reference taken at the time of creation so that when all 1221 * Put the reference taken at the time of creation so that when all
1214 * queues are gone, group can be destroyed. 1222 * queues are gone, group can be destroyed.
@@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq)
1604 cfqq->cfqd->rq_queued--; 1612 cfqq->cfqd->rq_queued--;
1605 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, 1613 cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
1606 rq_data_dir(rq), rq_is_sync(rq)); 1614 rq_data_dir(rq), rq_is_sync(rq));
1615 if (rq->cmd_flags & REQ_META) {
1616 WARN_ON(!cfqq->meta_pending);
1617 cfqq->meta_pending--;
1618 }
1607} 1619}
1608 1620
1609static int cfq_merge(struct request_queue *q, struct request **req, 1621static int cfq_merge(struct request_queue *q, struct request **req,
@@ -3357,6 +3369,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
3357 return true; 3369 return true;
3358 3370
3359 /* 3371 /*
3372 * So both queues are sync. Let the new request get disk time if
3373 * it's a metadata request and the current queue is doing regular IO.
3374 */
3375 if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
3376 return true;
3377
3378 /*
3360 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. 3379 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
3361 */ 3380 */
3362 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) 3381 if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
@@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
3420 struct cfq_io_context *cic = RQ_CIC(rq); 3439 struct cfq_io_context *cic = RQ_CIC(rq);
3421 3440
3422 cfqd->rq_queued++; 3441 cfqd->rq_queued++;
3442 if (rq->cmd_flags & REQ_META)
3443 cfqq->meta_pending++;
3423 3444
3424 cfq_update_io_thinktime(cfqd, cfqq, cic); 3445 cfq_update_io_thinktime(cfqd, cfqq, cic);
3425 cfq_update_io_seektime(cfqd, cfqq, rq); 3446 cfq_update_io_seektime(cfqd, cfqq, rq);
diff --git a/block/genhd.c b/block/genhd.c
index 5cb51c55f6d..e2f67902dd0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1146 cpu = part_stat_lock(); 1146 cpu = part_stat_lock();
1147 part_round_stats(cpu, hd); 1147 part_round_stats(cpu, hd);
1148 part_stat_unlock(); 1148 part_stat_unlock();
1149 seq_printf(seqf, "%4d %7d %s %lu %lu %llu " 1149 seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
1150 "%u %lu %lu %llu %u %u %u %u\n", 1150 "%u %lu %lu %lu %u %u %u %u\n",
1151 MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1151 MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
1152 disk_name(gp, hd->partno, buf), 1152 disk_name(gp, hd->partno, buf),
1153 part_stat_read(hd, ios[READ]), 1153 part_stat_read(hd, ios[READ]),
1154 part_stat_read(hd, merges[READ]), 1154 part_stat_read(hd, merges[READ]),
1155 (unsigned long long)part_stat_read(hd, sectors[READ]), 1155 part_stat_read(hd, sectors[READ]),
1156 jiffies_to_msecs(part_stat_read(hd, ticks[READ])), 1156 jiffies_to_msecs(part_stat_read(hd, ticks[READ])),
1157 part_stat_read(hd, ios[WRITE]), 1157 part_stat_read(hd, ios[WRITE]),
1158 part_stat_read(hd, merges[WRITE]), 1158 part_stat_read(hd, merges[WRITE]),
1159 (unsigned long long)part_stat_read(hd, sectors[WRITE]), 1159 part_stat_read(hd, sectors[WRITE]),
1160 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), 1160 jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])),
1161 part_in_flight(hd), 1161 part_in_flight(hd),
1162 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1162 jiffies_to_msecs(part_stat_read(hd, io_ticks)),
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 717d6e4e18d..6f07ec1c2f5 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -256,6 +256,21 @@ config BLK_DEV_LOOP
256 256
257 Most users will answer N here. 257 Most users will answer N here.
258 258
259config BLK_DEV_LOOP_MIN_COUNT
260 int "Number of loop devices to pre-create at init time"
261 depends on BLK_DEV_LOOP
262 default 8
263 help
264 Static number of loop devices to be unconditionally pre-created
265 at init time.
266
267 This default value can be overwritten on the kernel command
268 line or with module-parameter loop.max_loop.
269
270 The historic default is 8. If a late 2011 version of losetup(8)
271 is used, it can be set to 0, since needed loop devices can be
272 dynamically allocated with the /dev/loop-control interface.
273
259config BLK_DEV_CRYPTOLOOP 274config BLK_DEV_CRYPTOLOOP
260 tristate "Cryptoloop Support" 275 tristate "Cryptoloop Support"
261 select CRYPTO 276 select CRYPTO
@@ -471,7 +486,7 @@ config XEN_BLKDEV_FRONTEND
471 in another domain which drives the actual block device. 486 in another domain which drives the actual block device.
472 487
473config XEN_BLKDEV_BACKEND 488config XEN_BLKDEV_BACKEND
474 tristate "Block-device backend driver" 489 tristate "Xen block-device backend driver"
475 depends on XEN_BACKEND 490 depends on XEN_BACKEND
476 help 491 help
477 The block-device backend driver allows the kernel to export its 492 The block-device backend driver allows the kernel to export its
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 515bcd948a4..0feab261e29 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1829,10 +1829,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
1829 1829
1830 /* silently ignore cpu mask on UP kernel */ 1830 /* silently ignore cpu mask on UP kernel */
1831 if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { 1831 if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
1832 err = __bitmap_parse(sc.cpu_mask, 32, 0, 1832 err = bitmap_parse(sc.cpu_mask, 32,
1833 cpumask_bits(new_cpu_mask), nr_cpu_ids); 1833 cpumask_bits(new_cpu_mask), nr_cpu_ids);
1834 if (err) { 1834 if (err) {
1835 dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); 1835 dev_warn(DEV, "bitmap_parse() failed with %d\n", err);
1836 retcode = ERR_CPU_MASK_PARSE; 1836 retcode = ERR_CPU_MASK_PARSE;
1837 goto fail; 1837 goto fail;
1838 } 1838 }
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 76c8da78212..4720c7ade0a 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -75,11 +75,11 @@
75#include <linux/kthread.h> 75#include <linux/kthread.h>
76#include <linux/splice.h> 76#include <linux/splice.h>
77#include <linux/sysfs.h> 77#include <linux/sysfs.h>
78 78#include <linux/miscdevice.h>
79#include <asm/uaccess.h> 79#include <asm/uaccess.h>
80 80
81static LIST_HEAD(loop_devices); 81static DEFINE_IDR(loop_index_idr);
82static DEFINE_MUTEX(loop_devices_mutex); 82static DEFINE_MUTEX(loop_index_mutex);
83 83
84static int max_part; 84static int max_part;
85static int part_shift; 85static int part_shift;
@@ -722,17 +722,10 @@ static inline int is_loop_device(struct file *file)
722static ssize_t loop_attr_show(struct device *dev, char *page, 722static ssize_t loop_attr_show(struct device *dev, char *page,
723 ssize_t (*callback)(struct loop_device *, char *)) 723 ssize_t (*callback)(struct loop_device *, char *))
724{ 724{
725 struct loop_device *l, *lo = NULL; 725 struct gendisk *disk = dev_to_disk(dev);
726 726 struct loop_device *lo = disk->private_data;
727 mutex_lock(&loop_devices_mutex);
728 list_for_each_entry(l, &loop_devices, lo_list)
729 if (disk_to_dev(l->lo_disk) == dev) {
730 lo = l;
731 break;
732 }
733 mutex_unlock(&loop_devices_mutex);
734 727
735 return lo ? callback(lo, page) : -EIO; 728 return callback(lo, page);
736} 729}
737 730
738#define LOOP_ATTR_RO(_name) \ 731#define LOOP_ATTR_RO(_name) \
@@ -750,10 +743,10 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
750 ssize_t ret; 743 ssize_t ret;
751 char *p = NULL; 744 char *p = NULL;
752 745
753 mutex_lock(&lo->lo_ctl_mutex); 746 spin_lock_irq(&lo->lo_lock);
754 if (lo->lo_backing_file) 747 if (lo->lo_backing_file)
755 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); 748 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
756 mutex_unlock(&lo->lo_ctl_mutex); 749 spin_unlock_irq(&lo->lo_lock);
757 750
758 if (IS_ERR_OR_NULL(p)) 751 if (IS_ERR_OR_NULL(p))
759 ret = PTR_ERR(p); 752 ret = PTR_ERR(p);
@@ -1007,7 +1000,9 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
1007 1000
1008 kthread_stop(lo->lo_thread); 1001 kthread_stop(lo->lo_thread);
1009 1002
1003 spin_lock_irq(&lo->lo_lock);
1010 lo->lo_backing_file = NULL; 1004 lo->lo_backing_file = NULL;
1005 spin_unlock_irq(&lo->lo_lock);
1011 1006
1012 loop_release_xfer(lo); 1007 loop_release_xfer(lo);
1013 lo->transfer = NULL; 1008 lo->transfer = NULL;
@@ -1485,13 +1480,22 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
1485 1480
1486static int lo_open(struct block_device *bdev, fmode_t mode) 1481static int lo_open(struct block_device *bdev, fmode_t mode)
1487{ 1482{
1488 struct loop_device *lo = bdev->bd_disk->private_data; 1483 struct loop_device *lo;
1484 int err = 0;
1485
1486 mutex_lock(&loop_index_mutex);
1487 lo = bdev->bd_disk->private_data;
1488 if (!lo) {
1489 err = -ENXIO;
1490 goto out;
1491 }
1489 1492
1490 mutex_lock(&lo->lo_ctl_mutex); 1493 mutex_lock(&lo->lo_ctl_mutex);
1491 lo->lo_refcnt++; 1494 lo->lo_refcnt++;
1492 mutex_unlock(&lo->lo_ctl_mutex); 1495 mutex_unlock(&lo->lo_ctl_mutex);
1493 1496out:
1494 return 0; 1497 mutex_unlock(&loop_index_mutex);
1498 return err;
1495} 1499}
1496 1500
1497static int lo_release(struct gendisk *disk, fmode_t mode) 1501static int lo_release(struct gendisk *disk, fmode_t mode)
@@ -1557,40 +1561,71 @@ int loop_register_transfer(struct loop_func_table *funcs)
1557 return 0; 1561 return 0;
1558} 1562}
1559 1563
1564static int unregister_transfer_cb(int id, void *ptr, void *data)
1565{
1566 struct loop_device *lo = ptr;
1567 struct loop_func_table *xfer = data;
1568
1569 mutex_lock(&lo->lo_ctl_mutex);
1570 if (lo->lo_encryption == xfer)
1571 loop_release_xfer(lo);
1572 mutex_unlock(&lo->lo_ctl_mutex);
1573 return 0;
1574}
1575
1560int loop_unregister_transfer(int number) 1576int loop_unregister_transfer(int number)
1561{ 1577{
1562 unsigned int n = number; 1578 unsigned int n = number;
1563 struct loop_device *lo;
1564 struct loop_func_table *xfer; 1579 struct loop_func_table *xfer;
1565 1580
1566 if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) 1581 if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
1567 return -EINVAL; 1582 return -EINVAL;
1568 1583
1569 xfer_funcs[n] = NULL; 1584 xfer_funcs[n] = NULL;
1570 1585 idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer);
1571 list_for_each_entry(lo, &loop_devices, lo_list) {
1572 mutex_lock(&lo->lo_ctl_mutex);
1573
1574 if (lo->lo_encryption == xfer)
1575 loop_release_xfer(lo);
1576
1577 mutex_unlock(&lo->lo_ctl_mutex);
1578 }
1579
1580 return 0; 1586 return 0;
1581} 1587}
1582 1588
1583EXPORT_SYMBOL(loop_register_transfer); 1589EXPORT_SYMBOL(loop_register_transfer);
1584EXPORT_SYMBOL(loop_unregister_transfer); 1590EXPORT_SYMBOL(loop_unregister_transfer);
1585 1591
1586static struct loop_device *loop_alloc(int i) 1592static int loop_add(struct loop_device **l, int i)
1587{ 1593{
1588 struct loop_device *lo; 1594 struct loop_device *lo;
1589 struct gendisk *disk; 1595 struct gendisk *disk;
1596 int err;
1590 1597
1591 lo = kzalloc(sizeof(*lo), GFP_KERNEL); 1598 lo = kzalloc(sizeof(*lo), GFP_KERNEL);
1592 if (!lo) 1599 if (!lo) {
1600 err = -ENOMEM;
1593 goto out; 1601 goto out;
1602 }
1603
1604 err = idr_pre_get(&loop_index_idr, GFP_KERNEL);
1605 if (err < 0)
1606 goto out_free_dev;
1607
1608 if (i >= 0) {
1609 int m;
1610
1611 /* create specific i in the index */
1612 err = idr_get_new_above(&loop_index_idr, lo, i, &m);
1613 if (err >= 0 && i != m) {
1614 idr_remove(&loop_index_idr, m);
1615 err = -EEXIST;
1616 }
1617 } else if (i == -1) {
1618 int m;
1619
1620 /* get next free nr */
1621 err = idr_get_new(&loop_index_idr, lo, &m);
1622 if (err >= 0)
1623 i = m;
1624 } else {
1625 err = -EINVAL;
1626 }
1627 if (err < 0)
1628 goto out_free_dev;
1594 1629
1595 lo->lo_queue = blk_alloc_queue(GFP_KERNEL); 1630 lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1596 if (!lo->lo_queue) 1631 if (!lo->lo_queue)
@@ -1611,81 +1646,158 @@ static struct loop_device *loop_alloc(int i)
1611 disk->private_data = lo; 1646 disk->private_data = lo;
1612 disk->queue = lo->lo_queue; 1647 disk->queue = lo->lo_queue;
1613 sprintf(disk->disk_name, "loop%d", i); 1648 sprintf(disk->disk_name, "loop%d", i);
1614 return lo; 1649 add_disk(disk);
1650 *l = lo;
1651 return lo->lo_number;
1615 1652
1616out_free_queue: 1653out_free_queue:
1617 blk_cleanup_queue(lo->lo_queue); 1654 blk_cleanup_queue(lo->lo_queue);
1618out_free_dev: 1655out_free_dev:
1619 kfree(lo); 1656 kfree(lo);
1620out: 1657out:
1621 return NULL; 1658 return err;
1622} 1659}
1623 1660
1624static void loop_free(struct loop_device *lo) 1661static void loop_remove(struct loop_device *lo)
1625{ 1662{
1663 del_gendisk(lo->lo_disk);
1626 blk_cleanup_queue(lo->lo_queue); 1664 blk_cleanup_queue(lo->lo_queue);
1627 put_disk(lo->lo_disk); 1665 put_disk(lo->lo_disk);
1628 list_del(&lo->lo_list);
1629 kfree(lo); 1666 kfree(lo);
1630} 1667}
1631 1668
1632static struct loop_device *loop_init_one(int i) 1669static int find_free_cb(int id, void *ptr, void *data)
1670{
1671 struct loop_device *lo = ptr;
1672 struct loop_device **l = data;
1673
1674 if (lo->lo_state == Lo_unbound) {
1675 *l = lo;
1676 return 1;
1677 }
1678 return 0;
1679}
1680
1681static int loop_lookup(struct loop_device **l, int i)
1633{ 1682{
1634 struct loop_device *lo; 1683 struct loop_device *lo;
1684 int ret = -ENODEV;
1635 1685
1636 list_for_each_entry(lo, &loop_devices, lo_list) { 1686 if (i < 0) {
1637 if (lo->lo_number == i) 1687 int err;
1638 return lo; 1688
1689 err = idr_for_each(&loop_index_idr, &find_free_cb, &lo);
1690 if (err == 1) {
1691 *l = lo;
1692 ret = lo->lo_number;
1693 }
1694 goto out;
1639 } 1695 }
1640 1696
1641 lo = loop_alloc(i); 1697 /* lookup and return a specific i */
1698 lo = idr_find(&loop_index_idr, i);
1642 if (lo) { 1699 if (lo) {
1643 add_disk(lo->lo_disk); 1700 *l = lo;
1644 list_add_tail(&lo->lo_list, &loop_devices); 1701 ret = lo->lo_number;
1645 } 1702 }
1646 return lo; 1703out:
1647} 1704 return ret;
1648
1649static void loop_del_one(struct loop_device *lo)
1650{
1651 del_gendisk(lo->lo_disk);
1652 loop_free(lo);
1653} 1705}
1654 1706
1655static struct kobject *loop_probe(dev_t dev, int *part, void *data) 1707static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1656{ 1708{
1657 struct loop_device *lo; 1709 struct loop_device *lo;
1658 struct kobject *kobj; 1710 struct kobject *kobj;
1711 int err;
1659 1712
1660 mutex_lock(&loop_devices_mutex); 1713 mutex_lock(&loop_index_mutex);
1661 lo = loop_init_one(MINOR(dev) >> part_shift); 1714 err = loop_lookup(&lo, MINOR(dev) >> part_shift);
1662 kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); 1715 if (err < 0)
1663 mutex_unlock(&loop_devices_mutex); 1716 err = loop_add(&lo, MINOR(dev) >> part_shift);
1717 if (err < 0)
1718 kobj = ERR_PTR(err);
1719 else
1720 kobj = get_disk(lo->lo_disk);
1721 mutex_unlock(&loop_index_mutex);
1664 1722
1665 *part = 0; 1723 *part = 0;
1666 return kobj; 1724 return kobj;
1667} 1725}
1668 1726
1727static long loop_control_ioctl(struct file *file, unsigned int cmd,
1728 unsigned long parm)
1729{
1730 struct loop_device *lo;
1731 int ret = -ENOSYS;
1732
1733 mutex_lock(&loop_index_mutex);
1734 switch (cmd) {
1735 case LOOP_CTL_ADD:
1736 ret = loop_lookup(&lo, parm);
1737 if (ret >= 0) {
1738 ret = -EEXIST;
1739 break;
1740 }
1741 ret = loop_add(&lo, parm);
1742 break;
1743 case LOOP_CTL_REMOVE:
1744 ret = loop_lookup(&lo, parm);
1745 if (ret < 0)
1746 break;
1747 mutex_lock(&lo->lo_ctl_mutex);
1748 if (lo->lo_state != Lo_unbound) {
1749 ret = -EBUSY;
1750 mutex_unlock(&lo->lo_ctl_mutex);
1751 break;
1752 }
1753 if (lo->lo_refcnt > 0) {
1754 ret = -EBUSY;
1755 mutex_unlock(&lo->lo_ctl_mutex);
1756 break;
1757 }
1758 lo->lo_disk->private_data = NULL;
1759 mutex_unlock(&lo->lo_ctl_mutex);
1760 idr_remove(&loop_index_idr, lo->lo_number);
1761 loop_remove(lo);
1762 break;
1763 case LOOP_CTL_GET_FREE:
1764 ret = loop_lookup(&lo, -1);
1765 if (ret >= 0)
1766 break;
1767 ret = loop_add(&lo, -1);
1768 }
1769 mutex_unlock(&loop_index_mutex);
1770
1771 return ret;
1772}
1773
1774static const struct file_operations loop_ctl_fops = {
1775 .open = nonseekable_open,
1776 .unlocked_ioctl = loop_control_ioctl,
1777 .compat_ioctl = loop_control_ioctl,
1778 .owner = THIS_MODULE,
1779 .llseek = noop_llseek,
1780};
1781
1782static struct miscdevice loop_misc = {
1783 .minor = LOOP_CTRL_MINOR,
1784 .name = "loop-control",
1785 .fops = &loop_ctl_fops,
1786};
1787
1788MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR);
1789MODULE_ALIAS("devname:loop-control");
1790
1669static int __init loop_init(void) 1791static int __init loop_init(void)
1670{ 1792{
1671 int i, nr; 1793 int i, nr;
1672 unsigned long range; 1794 unsigned long range;
1673 struct loop_device *lo, *next; 1795 struct loop_device *lo;
1796 int err;
1674 1797
1675 /* 1798 err = misc_register(&loop_misc);
1676 * loop module now has a feature to instantiate underlying device 1799 if (err < 0)
1677 * structure on-demand, provided that there is an access dev node. 1800 return err;
1678 * However, this will not work well with user space tool that doesn't
1679 * know about such "feature". In order to not break any existing
1680 * tool, we do the following:
1681 *
1682 * (1) if max_loop is specified, create that many upfront, and this
1683 * also becomes a hard limit.
1684 * (2) if max_loop is not specified, create 8 loop device on module
1685 * load, user can further extend loop device by create dev node
1686 * themselves and have kernel automatically instantiate actual
1687 * device on-demand.
1688 */
1689 1801
1690 part_shift = 0; 1802 part_shift = 0;
1691 if (max_part > 0) { 1803 if (max_part > 0) {
@@ -1708,57 +1820,60 @@ static int __init loop_init(void)
1708 if (max_loop > 1UL << (MINORBITS - part_shift)) 1820 if (max_loop > 1UL << (MINORBITS - part_shift))
1709 return -EINVAL; 1821 return -EINVAL;
1710 1822
1823 /*
1824 * If max_loop is specified, create that many devices upfront.
1825 * This also becomes a hard limit. If max_loop is not specified,
1826 * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module
1827 * init time. Loop devices can be requested on-demand with the
1828 * /dev/loop-control interface, or be instantiated by accessing
1829 * a 'dead' device node.
1830 */
1711 if (max_loop) { 1831 if (max_loop) {
1712 nr = max_loop; 1832 nr = max_loop;
1713 range = max_loop << part_shift; 1833 range = max_loop << part_shift;
1714 } else { 1834 } else {
1715 nr = 8; 1835 nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
1716 range = 1UL << MINORBITS; 1836 range = 1UL << MINORBITS;
1717 } 1837 }
1718 1838
1719 if (register_blkdev(LOOP_MAJOR, "loop")) 1839 if (register_blkdev(LOOP_MAJOR, "loop"))
1720 return -EIO; 1840 return -EIO;
1721 1841
1722 for (i = 0; i < nr; i++) {
1723 lo = loop_alloc(i);
1724 if (!lo)
1725 goto Enomem;
1726 list_add_tail(&lo->lo_list, &loop_devices);
1727 }
1728
1729 /* point of no return */
1730
1731 list_for_each_entry(lo, &loop_devices, lo_list)
1732 add_disk(lo->lo_disk);
1733
1734 blk_register_region(MKDEV(LOOP_MAJOR, 0), range, 1842 blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
1735 THIS_MODULE, loop_probe, NULL, NULL); 1843 THIS_MODULE, loop_probe, NULL, NULL);
1736 1844
1845 /* pre-create number of devices given by config or max_loop */
1846 mutex_lock(&loop_index_mutex);
1847 for (i = 0; i < nr; i++)
1848 loop_add(&lo, i);
1849 mutex_unlock(&loop_index_mutex);
1850
1737 printk(KERN_INFO "loop: module loaded\n"); 1851 printk(KERN_INFO "loop: module loaded\n");
1738 return 0; 1852 return 0;
1853}
1739 1854
1740Enomem: 1855static int loop_exit_cb(int id, void *ptr, void *data)
1741 printk(KERN_INFO "loop: out of memory\n"); 1856{
1742 1857 struct loop_device *lo = ptr;
1743 list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1744 loop_free(lo);
1745 1858
1746 unregister_blkdev(LOOP_MAJOR, "loop"); 1859 loop_remove(lo);
1747 return -ENOMEM; 1860 return 0;
1748} 1861}
1749 1862
1750static void __exit loop_exit(void) 1863static void __exit loop_exit(void)
1751{ 1864{
1752 unsigned long range; 1865 unsigned long range;
1753 struct loop_device *lo, *next;
1754 1866
1755 range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; 1867 range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
1756 1868
1757 list_for_each_entry_safe(lo, next, &loop_devices, lo_list) 1869 idr_for_each(&loop_index_idr, &loop_exit_cb, NULL);
1758 loop_del_one(lo); 1870 idr_remove_all(&loop_index_idr);
1871 idr_destroy(&loop_index_idr);
1759 1872
1760 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); 1873 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
1761 unregister_blkdev(LOOP_MAJOR, "loop"); 1874 unregister_blkdev(LOOP_MAJOR, "loop");
1875
1876 misc_deregister(&loop_misc);
1762} 1877}
1763 1878
1764module_init(loop_init); 1879module_init(loop_init);
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 773bfa79277..ae3e167e17a 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1184,6 +1184,7 @@ static struct of_device_id swim3_match[] =
1184 { 1184 {
1185 .compatible = "swim3" 1185 .compatible = "swim3"
1186 }, 1186 },
1187 { /* end of list */ }
1187}; 1188};
1188 1189
1189static struct macio_driver swim3_driver = 1190static struct macio_driver swim3_driver =
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index b536a9cef91..9ea8c2576c7 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -123,8 +123,8 @@ static DEFINE_SPINLOCK(minor_lock);
123#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) 123#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
124#define EMULATED_HD_DISK_MINOR_OFFSET (0) 124#define EMULATED_HD_DISK_MINOR_OFFSET (0)
125#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) 125#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
126#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16)) 126#define EMULATED_SD_DISK_MINOR_OFFSET (0)
127#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4) 127#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
128 128
129#define DEV_NAME "xvd" /* name in /dev */ 129#define DEV_NAME "xvd" /* name in /dev */
130 130
@@ -529,7 +529,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
529 minor = BLKIF_MINOR_EXT(info->vdevice); 529 minor = BLKIF_MINOR_EXT(info->vdevice);
530 nr_parts = PARTS_PER_EXT_DISK; 530 nr_parts = PARTS_PER_EXT_DISK;
531 offset = minor / nr_parts; 531 offset = minor / nr_parts;
532 if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4) 532 if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
533 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " 533 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
534 "emulated IDE disks,\n\t choose an xvd device name" 534 "emulated IDE disks,\n\t choose an xvd device name"
535 "from xvde on\n", info->vdevice); 535 "from xvde on\n", info->vdevice);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 75fb965b8f7..f997c27d79e 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1929,11 +1929,17 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s,
1929 goto out; 1929 goto out;
1930 1930
1931 s->manufact.len = buf[0] << 8 | buf[1]; 1931 s->manufact.len = buf[0] << 8 | buf[1];
1932 if (s->manufact.len < 0 || s->manufact.len > 2048) { 1932 if (s->manufact.len < 0) {
1933 cdinfo(CD_WARNING, "Received invalid manufacture info length" 1933 cdinfo(CD_WARNING, "Received invalid manufacture info length"
1934 " (%d)\n", s->manufact.len); 1934 " (%d)\n", s->manufact.len);
1935 ret = -EIO; 1935 ret = -EIO;
1936 } else { 1936 } else {
1937 if (s->manufact.len > 2048) {
1938 cdinfo(CD_WARNING, "Received invalid manufacture info "
1939 "length (%d): truncating to 2048\n",
1940 s->manufact.len);
1941 s->manufact.len = 2048;
1942 }
1937 memcpy(s->manufact.value, &buf[4], s->manufact.len); 1943 memcpy(s->manufact.value, &buf[4], s->manufact.len);
1938 } 1944 }
1939 1945
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 6395692b2e7..32f0076e844 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -125,7 +125,11 @@ enum rq_flag_bits {
125 __REQ_SYNC, /* request is sync (sync write or read) */ 125 __REQ_SYNC, /* request is sync (sync write or read) */
126 __REQ_META, /* metadata io request */ 126 __REQ_META, /* metadata io request */
127 __REQ_DISCARD, /* request to discard sectors */ 127 __REQ_DISCARD, /* request to discard sectors */
128 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
129
128 __REQ_NOIDLE, /* don't anticipate more IO after this one */ 130 __REQ_NOIDLE, /* don't anticipate more IO after this one */
131 __REQ_FUA, /* forced unit access */
132 __REQ_FLUSH, /* request for cache flush */
129 133
130 /* bio only flags */ 134 /* bio only flags */
131 __REQ_RAHEAD, /* read ahead, can fail anytime */ 135 __REQ_RAHEAD, /* read ahead, can fail anytime */
@@ -135,7 +139,6 @@ enum rq_flag_bits {
135 /* request only flags */ 139 /* request only flags */
136 __REQ_SORTED, /* elevator knows about this request */ 140 __REQ_SORTED, /* elevator knows about this request */
137 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 141 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
138 __REQ_FUA, /* forced unit access */
139 __REQ_NOMERGE, /* don't touch this for merging */ 142 __REQ_NOMERGE, /* don't touch this for merging */
140 __REQ_STARTED, /* drive already may have started this one */ 143 __REQ_STARTED, /* drive already may have started this one */
141 __REQ_DONTPREP, /* don't call prep for this one */ 144 __REQ_DONTPREP, /* don't call prep for this one */
@@ -146,11 +149,9 @@ enum rq_flag_bits {
146 __REQ_PREEMPT, /* set for "ide_preempt" requests */ 149 __REQ_PREEMPT, /* set for "ide_preempt" requests */
147 __REQ_ALLOCED, /* request came from our alloc pool */ 150 __REQ_ALLOCED, /* request came from our alloc pool */
148 __REQ_COPY_USER, /* contains copies of user pages */ 151 __REQ_COPY_USER, /* contains copies of user pages */
149 __REQ_FLUSH, /* request for cache flush */
150 __REQ_FLUSH_SEQ, /* request for flush sequence */ 152 __REQ_FLUSH_SEQ, /* request for flush sequence */
151 __REQ_IO_STAT, /* account I/O stat */ 153 __REQ_IO_STAT, /* account I/O stat */
152 __REQ_MIXED_MERGE, /* merge of different types, fail separately */ 154 __REQ_MIXED_MERGE, /* merge of different types, fail separately */
153 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
154 __REQ_NR_BITS, /* stops here */ 155 __REQ_NR_BITS, /* stops here */
155}; 156};
156 157
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0e67c45b3bc..84b15d54f8c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -30,6 +30,7 @@ struct request_pm_state;
30struct blk_trace; 30struct blk_trace;
31struct request; 31struct request;
32struct sg_io_hdr; 32struct sg_io_hdr;
33struct bsg_job;
33 34
34#define BLKDEV_MIN_RQ 4 35#define BLKDEV_MIN_RQ 4
35#define BLKDEV_MAX_RQ 128 /* Default maximum */ 36#define BLKDEV_MAX_RQ 128 /* Default maximum */
@@ -117,6 +118,7 @@ struct request {
117 struct { 118 struct {
118 unsigned int seq; 119 unsigned int seq;
119 struct list_head list; 120 struct list_head list;
121 rq_end_io_fn *saved_end_io;
120 } flush; 122 } flush;
121 }; 123 };
122 124
@@ -209,6 +211,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
209typedef void (softirq_done_fn)(struct request *); 211typedef void (softirq_done_fn)(struct request *);
210typedef int (dma_drain_needed_fn)(struct request *); 212typedef int (dma_drain_needed_fn)(struct request *);
211typedef int (lld_busy_fn) (struct request_queue *q); 213typedef int (lld_busy_fn) (struct request_queue *q);
214typedef int (bsg_job_fn) (struct bsg_job *);
212 215
213enum blk_eh_timer_return { 216enum blk_eh_timer_return {
214 BLK_EH_NOT_HANDLED, 217 BLK_EH_NOT_HANDLED,
@@ -375,6 +378,8 @@ struct request_queue {
375 struct mutex sysfs_lock; 378 struct mutex sysfs_lock;
376 379
377#if defined(CONFIG_BLK_DEV_BSG) 380#if defined(CONFIG_BLK_DEV_BSG)
381 bsg_job_fn *bsg_job_fn;
382 int bsg_job_size;
378 struct bsg_class_device bsg_dev; 383 struct bsg_class_device bsg_dev;
379#endif 384#endif
380 385
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 8c7c2de7631..8e9e4bc6d73 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -14,7 +14,7 @@
14enum blktrace_cat { 14enum blktrace_cat {
15 BLK_TC_READ = 1 << 0, /* reads */ 15 BLK_TC_READ = 1 << 0, /* reads */
16 BLK_TC_WRITE = 1 << 1, /* writes */ 16 BLK_TC_WRITE = 1 << 1, /* writes */
17 BLK_TC_BARRIER = 1 << 2, /* barrier */ 17 BLK_TC_FLUSH = 1 << 2, /* flush */
18 BLK_TC_SYNC = 1 << 3, /* sync IO */ 18 BLK_TC_SYNC = 1 << 3, /* sync IO */
19 BLK_TC_SYNCIO = BLK_TC_SYNC, 19 BLK_TC_SYNCIO = BLK_TC_SYNC,
20 BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ 20 BLK_TC_QUEUE = 1 << 4, /* queueing/merging */
@@ -28,8 +28,9 @@ enum blktrace_cat {
28 BLK_TC_META = 1 << 12, /* metadata */ 28 BLK_TC_META = 1 << 12, /* metadata */
29 BLK_TC_DISCARD = 1 << 13, /* discard requests */ 29 BLK_TC_DISCARD = 1 << 13, /* discard requests */
30 BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ 30 BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */
31 BLK_TC_FUA = 1 << 15, /* fua requests */
31 32
32 BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ 33 BLK_TC_END = 1 << 15, /* we've run out of bits! */
33}; 34};
34 35
35#define BLK_TC_SHIFT (16) 36#define BLK_TC_SHIFT (16)
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
new file mode 100644
index 00000000000..f55ab8cdc10
--- /dev/null
+++ b/include/linux/bsg-lib.h
@@ -0,0 +1,73 @@
1/*
2 * BSG helper library
3 *
4 * Copyright (C) 2008 James Smart, Emulex Corporation
5 * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
6 * Copyright (C) 2011 Mike Christie
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 */
23#ifndef _BLK_BSG_
24#define _BLK_BSG_
25
26#include <linux/blkdev.h>
27
28struct request;
29struct device;
30struct scatterlist;
31struct request_queue;
32
33struct bsg_buffer {
34 unsigned int payload_len;
35 int sg_cnt;
36 struct scatterlist *sg_list;
37};
38
39struct bsg_job {
40 struct device *dev;
41 struct request *req;
42
43 /* Transport/driver specific request/reply structs */
44 void *request;
45 void *reply;
46
47 unsigned int request_len;
48 unsigned int reply_len;
49 /*
50 * On entry : reply_len indicates the buffer size allocated for
51 * the reply.
52 *
53 * Upon completion : the message handler must set reply_len
54 * to indicates the size of the reply to be returned to the
55 * caller.
56 */
57
58 /* DMA payloads for the request/response */
59 struct bsg_buffer request_payload;
60 struct bsg_buffer reply_payload;
61
62 void *dd_data; /* Used for driver-specific storage */
63};
64
65void bsg_job_done(struct bsg_job *job, int result,
66 unsigned int reply_payload_rcv_len);
67int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name,
68 bsg_job_fn *job_fn, int dd_job_size);
69void bsg_request_fn(struct request_queue *q);
70void bsg_remove_queue(struct request_queue *q);
71void bsg_goose_queue(struct request_queue *q);
72
73#endif
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 66c194e2d9b..683d6989011 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -64,7 +64,6 @@ struct loop_device {
64 64
65 struct request_queue *lo_queue; 65 struct request_queue *lo_queue;
66 struct gendisk *lo_disk; 66 struct gendisk *lo_disk;
67 struct list_head lo_list;
68}; 67};
69 68
70#endif /* __KERNEL__ */ 69#endif /* __KERNEL__ */
@@ -161,4 +160,8 @@ int loop_unregister_transfer(int number);
161#define LOOP_CHANGE_FD 0x4C06 160#define LOOP_CHANGE_FD 0x4C06
162#define LOOP_SET_CAPACITY 0x4C07 161#define LOOP_SET_CAPACITY 0x4C07
163 162
163/* /dev/loop-control interface */
164#define LOOP_CTL_ADD 0x4C80
165#define LOOP_CTL_REMOVE 0x4C81
166#define LOOP_CTL_GET_FREE 0x4C82
164#endif 167#endif
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 18fd13028ba..c309b1ecdc1 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -40,6 +40,7 @@
40#define BTRFS_MINOR 234 40#define BTRFS_MINOR 234
41#define AUTOFS_MINOR 235 41#define AUTOFS_MINOR 235
42#define MAPPER_CTRL_MINOR 236 42#define MAPPER_CTRL_MINOR 236
43#define LOOP_CTRL_MINOR 237
43#define MISC_DYNAMIC_MINOR 255 44#define MISC_DYNAMIC_MINOR 255
44 45
45struct device; 46struct device;
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index bf366547da2..05c5e61f0a7 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -8,6 +8,8 @@
8#include <linux/blkdev.h> 8#include <linux/blkdev.h>
9#include <linux/tracepoint.h> 9#include <linux/tracepoint.h>
10 10
11#define RWBS_LEN 8
12
11DECLARE_EVENT_CLASS(block_rq_with_error, 13DECLARE_EVENT_CLASS(block_rq_with_error,
12 14
13 TP_PROTO(struct request_queue *q, struct request *rq), 15 TP_PROTO(struct request_queue *q, struct request *rq),
@@ -19,7 +21,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error,
19 __field( sector_t, sector ) 21 __field( sector_t, sector )
20 __field( unsigned int, nr_sector ) 22 __field( unsigned int, nr_sector )
21 __field( int, errors ) 23 __field( int, errors )
22 __array( char, rwbs, 6 ) 24 __array( char, rwbs, RWBS_LEN )
23 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 25 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
24 ), 26 ),
25 27
@@ -104,7 +106,7 @@ DECLARE_EVENT_CLASS(block_rq,
104 __field( sector_t, sector ) 106 __field( sector_t, sector )
105 __field( unsigned int, nr_sector ) 107 __field( unsigned int, nr_sector )
106 __field( unsigned int, bytes ) 108 __field( unsigned int, bytes )
107 __array( char, rwbs, 6 ) 109 __array( char, rwbs, RWBS_LEN )
108 __array( char, comm, TASK_COMM_LEN ) 110 __array( char, comm, TASK_COMM_LEN )
109 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) 111 __dynamic_array( char, cmd, blk_cmd_buf_len(rq) )
110 ), 112 ),
@@ -183,7 +185,7 @@ TRACE_EVENT(block_bio_bounce,
183 __field( dev_t, dev ) 185 __field( dev_t, dev )
184 __field( sector_t, sector ) 186 __field( sector_t, sector )
185 __field( unsigned int, nr_sector ) 187 __field( unsigned int, nr_sector )
186 __array( char, rwbs, 6 ) 188 __array( char, rwbs, RWBS_LEN )
187 __array( char, comm, TASK_COMM_LEN ) 189 __array( char, comm, TASK_COMM_LEN )
188 ), 190 ),
189 191
@@ -222,7 +224,7 @@ TRACE_EVENT(block_bio_complete,
222 __field( sector_t, sector ) 224 __field( sector_t, sector )
223 __field( unsigned, nr_sector ) 225 __field( unsigned, nr_sector )
224 __field( int, error ) 226 __field( int, error )
225 __array( char, rwbs, 6 ) 227 __array( char, rwbs, RWBS_LEN)
226 ), 228 ),
227 229
228 TP_fast_assign( 230 TP_fast_assign(
@@ -249,7 +251,7 @@ DECLARE_EVENT_CLASS(block_bio,
249 __field( dev_t, dev ) 251 __field( dev_t, dev )
250 __field( sector_t, sector ) 252 __field( sector_t, sector )
251 __field( unsigned int, nr_sector ) 253 __field( unsigned int, nr_sector )
252 __array( char, rwbs, 6 ) 254 __array( char, rwbs, RWBS_LEN )
253 __array( char, comm, TASK_COMM_LEN ) 255 __array( char, comm, TASK_COMM_LEN )
254 ), 256 ),
255 257
@@ -321,7 +323,7 @@ DECLARE_EVENT_CLASS(block_get_rq,
321 __field( dev_t, dev ) 323 __field( dev_t, dev )
322 __field( sector_t, sector ) 324 __field( sector_t, sector )
323 __field( unsigned int, nr_sector ) 325 __field( unsigned int, nr_sector )
324 __array( char, rwbs, 6 ) 326 __array( char, rwbs, RWBS_LEN )
325 __array( char, comm, TASK_COMM_LEN ) 327 __array( char, comm, TASK_COMM_LEN )
326 ), 328 ),
327 329
@@ -456,7 +458,7 @@ TRACE_EVENT(block_split,
456 __field( dev_t, dev ) 458 __field( dev_t, dev )
457 __field( sector_t, sector ) 459 __field( sector_t, sector )
458 __field( sector_t, new_sector ) 460 __field( sector_t, new_sector )
459 __array( char, rwbs, 6 ) 461 __array( char, rwbs, RWBS_LEN )
460 __array( char, comm, TASK_COMM_LEN ) 462 __array( char, comm, TASK_COMM_LEN )
461 ), 463 ),
462 464
@@ -498,7 +500,7 @@ TRACE_EVENT(block_bio_remap,
498 __field( unsigned int, nr_sector ) 500 __field( unsigned int, nr_sector )
499 __field( dev_t, old_dev ) 501 __field( dev_t, old_dev )
500 __field( sector_t, old_sector ) 502 __field( sector_t, old_sector )
501 __array( char, rwbs, 6 ) 503 __array( char, rwbs, RWBS_LEN)
502 ), 504 ),
503 505
504 TP_fast_assign( 506 TP_fast_assign(
@@ -542,7 +544,7 @@ TRACE_EVENT(block_rq_remap,
542 __field( unsigned int, nr_sector ) 544 __field( unsigned int, nr_sector )
543 __field( dev_t, old_dev ) 545 __field( dev_t, old_dev )
544 __field( sector_t, old_sector ) 546 __field( sector_t, old_sector )
545 __array( char, rwbs, 6 ) 547 __array( char, rwbs, RWBS_LEN)
546 ), 548 ),
547 549
548 TP_fast_assign( 550 TP_fast_assign(
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 6957aa298df..7c910a5593a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
206 what |= MASK_TC_BIT(rw, RAHEAD); 206 what |= MASK_TC_BIT(rw, RAHEAD);
207 what |= MASK_TC_BIT(rw, META); 207 what |= MASK_TC_BIT(rw, META);
208 what |= MASK_TC_BIT(rw, DISCARD); 208 what |= MASK_TC_BIT(rw, DISCARD);
209 what |= MASK_TC_BIT(rw, FLUSH);
210 what |= MASK_TC_BIT(rw, FUA);
209 211
210 pid = tsk->pid; 212 pid = tsk->pid;
211 if (act_log_check(bt, what, sector, pid)) 213 if (act_log_check(bt, what, sector, pid))
@@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
1054 goto out; 1056 goto out;
1055 } 1057 }
1056 1058
1059 if (tc & BLK_TC_FLUSH)
1060 rwbs[i++] = 'F';
1061
1057 if (tc & BLK_TC_DISCARD) 1062 if (tc & BLK_TC_DISCARD)
1058 rwbs[i++] = 'D'; 1063 rwbs[i++] = 'D';
1059 else if (tc & BLK_TC_WRITE) 1064 else if (tc & BLK_TC_WRITE)
@@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
1063 else 1068 else
1064 rwbs[i++] = 'N'; 1069 rwbs[i++] = 'N';
1065 1070
1071 if (tc & BLK_TC_FUA)
1072 rwbs[i++] = 'F';
1066 if (tc & BLK_TC_AHEAD) 1073 if (tc & BLK_TC_AHEAD)
1067 rwbs[i++] = 'A'; 1074 rwbs[i++] = 'A';
1068 if (tc & BLK_TC_BARRIER)
1069 rwbs[i++] = 'B';
1070 if (tc & BLK_TC_SYNC) 1075 if (tc & BLK_TC_SYNC)
1071 rwbs[i++] = 'S'; 1076 rwbs[i++] = 'S';
1072 if (tc & BLK_TC_META) 1077 if (tc & BLK_TC_META)
@@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
1132 1137
1133static int blk_log_action_classic(struct trace_iterator *iter, const char *act) 1138static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
1134{ 1139{
1135 char rwbs[6]; 1140 char rwbs[RWBS_LEN];
1136 unsigned long long ts = iter->ts; 1141 unsigned long long ts = iter->ts;
1137 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); 1142 unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC);
1138 unsigned secs = (unsigned long)ts; 1143 unsigned secs = (unsigned long)ts;
@@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act)
1148 1153
1149static int blk_log_action(struct trace_iterator *iter, const char *act) 1154static int blk_log_action(struct trace_iterator *iter, const char *act)
1150{ 1155{
1151 char rwbs[6]; 1156 char rwbs[RWBS_LEN];
1152 const struct blk_io_trace *t = te_blk_io_trace(iter->ent); 1157 const struct blk_io_trace *t = te_blk_io_trace(iter->ent);
1153 1158
1154 fill_rwbs(rwbs, t); 1159 fill_rwbs(rwbs, t);
@@ -1561,7 +1566,7 @@ static const struct {
1561} mask_maps[] = { 1566} mask_maps[] = {
1562 { BLK_TC_READ, "read" }, 1567 { BLK_TC_READ, "read" },
1563 { BLK_TC_WRITE, "write" }, 1568 { BLK_TC_WRITE, "write" },
1564 { BLK_TC_BARRIER, "barrier" }, 1569 { BLK_TC_FLUSH, "flush" },
1565 { BLK_TC_SYNC, "sync" }, 1570 { BLK_TC_SYNC, "sync" },
1566 { BLK_TC_QUEUE, "queue" }, 1571 { BLK_TC_QUEUE, "queue" },
1567 { BLK_TC_REQUEUE, "requeue" }, 1572 { BLK_TC_REQUEUE, "requeue" },
@@ -1573,6 +1578,7 @@ static const struct {
1573 { BLK_TC_META, "meta" }, 1578 { BLK_TC_META, "meta" },
1574 { BLK_TC_DISCARD, "discard" }, 1579 { BLK_TC_DISCARD, "discard" },
1575 { BLK_TC_DRV_DATA, "drv_data" }, 1580 { BLK_TC_DRV_DATA, "drv_data" },
1581 { BLK_TC_FUA, "fua" },
1576}; 1582};
1577 1583
1578static int blk_trace_str2mask(const char *str) 1584static int blk_trace_str2mask(const char *str)
@@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1788{ 1794{
1789 int i = 0; 1795 int i = 0;
1790 1796
1797 if (rw & REQ_FLUSH)
1798 rwbs[i++] = 'F';
1799
1791 if (rw & WRITE) 1800 if (rw & WRITE)
1792 rwbs[i++] = 'W'; 1801 rwbs[i++] = 'W';
1793 else if (rw & REQ_DISCARD) 1802 else if (rw & REQ_DISCARD)
@@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1797 else 1806 else
1798 rwbs[i++] = 'N'; 1807 rwbs[i++] = 'N';
1799 1808
1809 if (rw & REQ_FUA)
1810 rwbs[i++] = 'F';
1800 if (rw & REQ_RAHEAD) 1811 if (rw & REQ_RAHEAD)
1801 rwbs[i++] = 'A'; 1812 rwbs[i++] = 'A';
1802 if (rw & REQ_SYNC) 1813 if (rw & REQ_SYNC)