aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 22:08:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 22:08:14 -0500
commit0910c0bdf7c291a41bc21e40a97389c9d4c1960d (patch)
tree177c4cb22ece78b18f64f548ae82b9a15edbb99c /drivers/block
parent2821fe6b00a1e902fd399bb4b7e40bc3041f4d44 (diff)
parente37459b8e2c7db6735e39e019e448b76e5e77647 (diff)
Merge branch 'for-3.13/core' of git://git.kernel.dk/linux-block
Pull block IO core updates from Jens Axboe: "This is the pull request for the core changes in the block layer for 3.13. It contains: - The new blk-mq request interface. This is a new and more scalable queueing model that marries the best part of the request based interface we currently have (which is fully featured, but scales poorly) and the bio based "interface" which the new drivers for high IOPS devices end up using because it's much faster than the request based one. The bio interface has no block layer support, since it taps into the stack much earlier. This means that drivers end up having to implement a lot of functionality on their own, like tagging, timeout handling, requeue, etc. The blk-mq interface provides all these. Some drivers even provide a switch to select bio or rq and has code to handle both, since things like merging only works in the rq model and hence is faster for some workloads. This is a huge mess. Conversion of these drivers nets us a substantial code reduction. Initial results on converting SCSI to this model even shows an 8x improvement on single queue devices. So while the model was intended to work on the newer multiqueue devices, it has substantial improvements for "classic" hardware as well. This code has gone through extensive testing and development, it's now ready to go. A pull request is coming to convert virtio-blk to this model will be will be coming as well, with more drivers scheduled for 3.14 conversion. - Two blktrace fixes from Jan and Chen Gang. - A plug merge fix from Alireza Haghdoost. - Conversion of __get_cpu_var() from Christoph Lameter. - Fix for sector_div() with 64-bit divider from Geert Uytterhoeven. - A fix for a race between request completion and the timeout handling from Jeff Moyer. This is what caused the merge conflict with blk-mq/core, in case you are looking at that. - A dm stacking fix from Mike Snitzer. - A code consolidation fix and duplicated code removal from Kent Overstreet. - A handful of block bug fixes from Mikulas Patocka, fixing a loop crash and memory corruption on blk cg. - Elevator switch bug fix from Tomoki Sekiyama. A heads-up that I had to rebase this branch. Initially the immutable bio_vecs had been queued up for inclusion, but a week later, it became clear that it wasn't fully cooked yet. So the decision was made to pull this out and postpone it until 3.14. It was a straight forward rebase, just pruning out the immutable series and the later fixes of problems with it. The rest of the patches applied directly and no further changes were made" * 'for-3.13/core' of git://git.kernel.dk/linux-block: (31 commits) block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO block: Do not call sector_div() with a 64-bit divisor kernel: trace: blktrace: remove redundent memcpy() in compat_blk_trace_setup() block: Consolidate duplicated bio_trim() implementations block: Use rw_copy_check_uvector() block: Enable sysfs nomerge control for I/O requests in the plug list block: properly stack underlying max_segment_size to DM device elevator: acquire q->sysfs_lock in elevator_change() elevator: Fix a race in elevator switching and md device initialization block: Replace __get_cpu_var uses bdi: test bdi_init failure block: fix a probe argument to blk_register_region loop: fix crash if blk_alloc_queue fails blk-core: Fix memory corruption if blkcg_init_queue fails block: fix race between request completion and timeout handling blktrace: Send BLK_TN_PROCESS events to all running traces blk-mq: don't disallow request merges for req->special being set blk-mq: mq plug list breakage blk-mq: fix for flush deadlock ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig3
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/brd.c2
-rw-r--r--drivers/block/floppy.c4
-rw-r--r--drivers/block/loop.c6
-rw-r--r--drivers/block/null_blk.c635
-rw-r--r--drivers/block/xen-blkfront.c53
7 files changed, 647 insertions, 57 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index e67fa16e1938..5902bd006a9c 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -15,6 +15,9 @@ menuconfig BLK_DEV
15 15
16if BLK_DEV 16if BLK_DEV
17 17
18config BLK_DEV_NULL_BLK
19 tristate "Null test block driver"
20
18config BLK_DEV_FD 21config BLK_DEV_FD
19 tristate "Normal floppy disk support" 22 tristate "Normal floppy disk support"
20 depends on ARCH_MAY_HAVE_PC_FDC 23 depends on ARCH_MAY_HAVE_PC_FDC
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index ca07399a8d99..03b3b4a2bd8a 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o
41obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ 41obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/
42 42
43obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ 43obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
44obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
44 45
45nvme-y := nvme-core.o nvme-scsi.o 46nvme-y := nvme-core.o nvme-scsi.o
46swim_mod-y := swim.o swim_asm.o 47swim_mod-y := swim.o swim_asm.o
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 9bf4371755f2..d91f1a56e861 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -545,7 +545,7 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
545 545
546 mutex_lock(&brd_devices_mutex); 546 mutex_lock(&brd_devices_mutex);
547 brd = brd_init_one(MINOR(dev) >> part_shift); 547 brd = brd_init_one(MINOR(dev) >> part_shift);
548 kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM); 548 kobj = brd ? get_disk(brd->brd_disk) : NULL;
549 mutex_unlock(&brd_devices_mutex); 549 mutex_unlock(&brd_devices_mutex);
550 550
551 *part = 0; 551 *part = 0;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 04ceb7e2fadd..000abe2f105c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2886,9 +2886,9 @@ static void do_fd_request(struct request_queue *q)
2886 return; 2886 return;
2887 2887
2888 if (WARN(atomic_read(&usage_count) == 0, 2888 if (WARN(atomic_read(&usage_count) == 0,
2889 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n", 2889 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%llx\n",
2890 current_req, (long)blk_rq_pos(current_req), current_req->cmd_type, 2890 current_req, (long)blk_rq_pos(current_req), current_req->cmd_type,
2891 current_req->cmd_flags)) 2891 (unsigned long long) current_req->cmd_flags))
2892 return; 2892 return;
2893 2893
2894 if (test_and_set_bit(0, &fdc_busy)) { 2894 if (test_and_set_bit(0, &fdc_busy)) {
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 40e715531aa6..dbdb88a4976c 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1633,7 +1633,7 @@ static int loop_add(struct loop_device **l, int i)
1633 err = -ENOMEM; 1633 err = -ENOMEM;
1634 lo->lo_queue = blk_alloc_queue(GFP_KERNEL); 1634 lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1635 if (!lo->lo_queue) 1635 if (!lo->lo_queue)
1636 goto out_free_dev; 1636 goto out_free_idr;
1637 1637
1638 disk = lo->lo_disk = alloc_disk(1 << part_shift); 1638 disk = lo->lo_disk = alloc_disk(1 << part_shift);
1639 if (!disk) 1639 if (!disk)
@@ -1678,6 +1678,8 @@ static int loop_add(struct loop_device **l, int i)
1678 1678
1679out_free_queue: 1679out_free_queue:
1680 blk_cleanup_queue(lo->lo_queue); 1680 blk_cleanup_queue(lo->lo_queue);
1681out_free_idr:
1682 idr_remove(&loop_index_idr, i);
1681out_free_dev: 1683out_free_dev:
1682 kfree(lo); 1684 kfree(lo);
1683out: 1685out:
@@ -1741,7 +1743,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1741 if (err < 0) 1743 if (err < 0)
1742 err = loop_add(&lo, MINOR(dev) >> part_shift); 1744 err = loop_add(&lo, MINOR(dev) >> part_shift);
1743 if (err < 0) 1745 if (err < 0)
1744 kobj = ERR_PTR(err); 1746 kobj = NULL;
1745 else 1747 else
1746 kobj = get_disk(lo->lo_disk); 1748 kobj = get_disk(lo->lo_disk);
1747 mutex_unlock(&loop_index_mutex); 1749 mutex_unlock(&loop_index_mutex);
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
new file mode 100644
index 000000000000..b5d842370cc9
--- /dev/null
+++ b/drivers/block/null_blk.c
@@ -0,0 +1,635 @@
1#include <linux/module.h>
2#include <linux/moduleparam.h>
3#include <linux/sched.h>
4#include <linux/fs.h>
5#include <linux/blkdev.h>
6#include <linux/init.h>
7#include <linux/slab.h>
8#include <linux/blk-mq.h>
9#include <linux/hrtimer.h>
10
11struct nullb_cmd {
12 struct list_head list;
13 struct llist_node ll_list;
14 struct call_single_data csd;
15 struct request *rq;
16 struct bio *bio;
17 unsigned int tag;
18 struct nullb_queue *nq;
19};
20
21struct nullb_queue {
22 unsigned long *tag_map;
23 wait_queue_head_t wait;
24 unsigned int queue_depth;
25
26 struct nullb_cmd *cmds;
27};
28
29struct nullb {
30 struct list_head list;
31 unsigned int index;
32 struct request_queue *q;
33 struct gendisk *disk;
34 struct hrtimer timer;
35 unsigned int queue_depth;
36 spinlock_t lock;
37
38 struct nullb_queue *queues;
39 unsigned int nr_queues;
40};
41
42static LIST_HEAD(nullb_list);
43static struct mutex lock;
44static int null_major;
45static int nullb_indexes;
46
47struct completion_queue {
48 struct llist_head list;
49 struct hrtimer timer;
50};
51
52/*
53 * These are per-cpu for now, they will need to be configured by the
54 * complete_queues parameter and appropriately mapped.
55 */
56static DEFINE_PER_CPU(struct completion_queue, completion_queues);
57
58enum {
59 NULL_IRQ_NONE = 0,
60 NULL_IRQ_SOFTIRQ = 1,
61 NULL_IRQ_TIMER = 2,
62
63 NULL_Q_BIO = 0,
64 NULL_Q_RQ = 1,
65 NULL_Q_MQ = 2,
66};
67
68static int submit_queues = 1;
69module_param(submit_queues, int, S_IRUGO);
70MODULE_PARM_DESC(submit_queues, "Number of submission queues");
71
72static int home_node = NUMA_NO_NODE;
73module_param(home_node, int, S_IRUGO);
74MODULE_PARM_DESC(home_node, "Home node for the device");
75
76static int queue_mode = NULL_Q_MQ;
77module_param(queue_mode, int, S_IRUGO);
78MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)");
79
80static int gb = 250;
81module_param(gb, int, S_IRUGO);
82MODULE_PARM_DESC(gb, "Size in GB");
83
84static int bs = 512;
85module_param(bs, int, S_IRUGO);
86MODULE_PARM_DESC(bs, "Block size (in bytes)");
87
88static int nr_devices = 2;
89module_param(nr_devices, int, S_IRUGO);
90MODULE_PARM_DESC(nr_devices, "Number of devices to register");
91
92static int irqmode = NULL_IRQ_SOFTIRQ;
93module_param(irqmode, int, S_IRUGO);
94MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
95
96static int completion_nsec = 10000;
97module_param(completion_nsec, int, S_IRUGO);
98MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
99
100static int hw_queue_depth = 64;
101module_param(hw_queue_depth, int, S_IRUGO);
102MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
103
104static bool use_per_node_hctx = true;
105module_param(use_per_node_hctx, bool, S_IRUGO);
106MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true");
107
108static void put_tag(struct nullb_queue *nq, unsigned int tag)
109{
110 clear_bit_unlock(tag, nq->tag_map);
111
112 if (waitqueue_active(&nq->wait))
113 wake_up(&nq->wait);
114}
115
116static unsigned int get_tag(struct nullb_queue *nq)
117{
118 unsigned int tag;
119
120 do {
121 tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
122 if (tag >= nq->queue_depth)
123 return -1U;
124 } while (test_and_set_bit_lock(tag, nq->tag_map));
125
126 return tag;
127}
128
129static void free_cmd(struct nullb_cmd *cmd)
130{
131 put_tag(cmd->nq, cmd->tag);
132}
133
134static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
135{
136 struct nullb_cmd *cmd;
137 unsigned int tag;
138
139 tag = get_tag(nq);
140 if (tag != -1U) {
141 cmd = &nq->cmds[tag];
142 cmd->tag = tag;
143 cmd->nq = nq;
144 return cmd;
145 }
146
147 return NULL;
148}
149
150static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
151{
152 struct nullb_cmd *cmd;
153 DEFINE_WAIT(wait);
154
155 cmd = __alloc_cmd(nq);
156 if (cmd || !can_wait)
157 return cmd;
158
159 do {
160 prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
161 cmd = __alloc_cmd(nq);
162 if (cmd)
163 break;
164
165 io_schedule();
166 } while (1);
167
168 finish_wait(&nq->wait, &wait);
169 return cmd;
170}
171
172static void end_cmd(struct nullb_cmd *cmd)
173{
174 if (cmd->rq) {
175 if (queue_mode == NULL_Q_MQ)
176 blk_mq_end_io(cmd->rq, 0);
177 else {
178 INIT_LIST_HEAD(&cmd->rq->queuelist);
179 blk_end_request_all(cmd->rq, 0);
180 }
181 } else if (cmd->bio)
182 bio_endio(cmd->bio, 0);
183
184 if (queue_mode != NULL_Q_MQ)
185 free_cmd(cmd);
186}
187
188static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
189{
190 struct completion_queue *cq;
191 struct llist_node *entry;
192 struct nullb_cmd *cmd;
193
194 cq = &per_cpu(completion_queues, smp_processor_id());
195
196 while ((entry = llist_del_all(&cq->list)) != NULL) {
197 do {
198 cmd = container_of(entry, struct nullb_cmd, ll_list);
199 end_cmd(cmd);
200 entry = entry->next;
201 } while (entry);
202 }
203
204 return HRTIMER_NORESTART;
205}
206
207static void null_cmd_end_timer(struct nullb_cmd *cmd)
208{
209 struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
210
211 cmd->ll_list.next = NULL;
212 if (llist_add(&cmd->ll_list, &cq->list)) {
213 ktime_t kt = ktime_set(0, completion_nsec);
214
215 hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
216 }
217
218 put_cpu();
219}
220
221static void null_softirq_done_fn(struct request *rq)
222{
223 blk_end_request_all(rq, 0);
224}
225
226#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
227
228static void null_ipi_cmd_end_io(void *data)
229{
230 struct completion_queue *cq;
231 struct llist_node *entry, *next;
232 struct nullb_cmd *cmd;
233
234 cq = &per_cpu(completion_queues, smp_processor_id());
235
236 entry = llist_del_all(&cq->list);
237
238 while (entry) {
239 next = entry->next;
240 cmd = llist_entry(entry, struct nullb_cmd, ll_list);
241 end_cmd(cmd);
242 entry = next;
243 }
244}
245
246static void null_cmd_end_ipi(struct nullb_cmd *cmd)
247{
248 struct call_single_data *data = &cmd->csd;
249 int cpu = get_cpu();
250 struct completion_queue *cq = &per_cpu(completion_queues, cpu);
251
252 cmd->ll_list.next = NULL;
253
254 if (llist_add(&cmd->ll_list, &cq->list)) {
255 data->func = null_ipi_cmd_end_io;
256 data->flags = 0;
257 __smp_call_function_single(cpu, data, 0);
258 }
259
260 put_cpu();
261}
262
263#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
264
265static inline void null_handle_cmd(struct nullb_cmd *cmd)
266{
267 /* Complete IO by inline, softirq or timer */
268 switch (irqmode) {
269 case NULL_IRQ_NONE:
270 end_cmd(cmd);
271 break;
272 case NULL_IRQ_SOFTIRQ:
273#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
274 null_cmd_end_ipi(cmd);
275#else
276 end_cmd(cmd);
277#endif
278 break;
279 case NULL_IRQ_TIMER:
280 null_cmd_end_timer(cmd);
281 break;
282 }
283}
284
285static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
286{
287 int index = 0;
288
289 if (nullb->nr_queues != 1)
290 index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
291
292 return &nullb->queues[index];
293}
294
295static void null_queue_bio(struct request_queue *q, struct bio *bio)
296{
297 struct nullb *nullb = q->queuedata;
298 struct nullb_queue *nq = nullb_to_queue(nullb);
299 struct nullb_cmd *cmd;
300
301 cmd = alloc_cmd(nq, 1);
302 cmd->bio = bio;
303
304 null_handle_cmd(cmd);
305}
306
307static int null_rq_prep_fn(struct request_queue *q, struct request *req)
308{
309 struct nullb *nullb = q->queuedata;
310 struct nullb_queue *nq = nullb_to_queue(nullb);
311 struct nullb_cmd *cmd;
312
313 cmd = alloc_cmd(nq, 0);
314 if (cmd) {
315 cmd->rq = req;
316 req->special = cmd;
317 return BLKPREP_OK;
318 }
319
320 return BLKPREP_DEFER;
321}
322
323static void null_request_fn(struct request_queue *q)
324{
325 struct request *rq;
326
327 while ((rq = blk_fetch_request(q)) != NULL) {
328 struct nullb_cmd *cmd = rq->special;
329
330 spin_unlock_irq(q->queue_lock);
331 null_handle_cmd(cmd);
332 spin_lock_irq(q->queue_lock);
333 }
334}
335
336static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
337{
338 struct nullb_cmd *cmd = rq->special;
339
340 cmd->rq = rq;
341 cmd->nq = hctx->driver_data;
342
343 null_handle_cmd(cmd);
344 return BLK_MQ_RQ_QUEUE_OK;
345}
346
347static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index)
348{
349 return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL,
350 hctx_index);
351}
352
353static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
354{
355 kfree(hctx);
356}
357
358static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
359 unsigned int index)
360{
361 struct nullb *nullb = data;
362 struct nullb_queue *nq = &nullb->queues[index];
363
364 init_waitqueue_head(&nq->wait);
365 nq->queue_depth = nullb->queue_depth;
366 nullb->nr_queues++;
367 hctx->driver_data = nq;
368
369 return 0;
370}
371
372static struct blk_mq_ops null_mq_ops = {
373 .queue_rq = null_queue_rq,
374 .map_queue = blk_mq_map_queue,
375 .init_hctx = null_init_hctx,
376};
377
378static struct blk_mq_reg null_mq_reg = {
379 .ops = &null_mq_ops,
380 .queue_depth = 64,
381 .cmd_size = sizeof(struct nullb_cmd),
382 .flags = BLK_MQ_F_SHOULD_MERGE,
383};
384
385static void null_del_dev(struct nullb *nullb)
386{
387 list_del_init(&nullb->list);
388
389 del_gendisk(nullb->disk);
390 if (queue_mode == NULL_Q_MQ)
391 blk_mq_free_queue(nullb->q);
392 else
393 blk_cleanup_queue(nullb->q);
394 put_disk(nullb->disk);
395 kfree(nullb);
396}
397
398static int null_open(struct block_device *bdev, fmode_t mode)
399{
400 return 0;
401}
402
403static void null_release(struct gendisk *disk, fmode_t mode)
404{
405}
406
407static const struct block_device_operations null_fops = {
408 .owner = THIS_MODULE,
409 .open = null_open,
410 .release = null_release,
411};
412
413static int setup_commands(struct nullb_queue *nq)
414{
415 struct nullb_cmd *cmd;
416 int i, tag_size;
417
418 nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
419 if (!nq->cmds)
420 return 1;
421
422 tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
423 nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
424 if (!nq->tag_map) {
425 kfree(nq->cmds);
426 return 1;
427 }
428
429 for (i = 0; i < nq->queue_depth; i++) {
430 cmd = &nq->cmds[i];
431 INIT_LIST_HEAD(&cmd->list);
432 cmd->ll_list.next = NULL;
433 cmd->tag = -1U;
434 }
435
436 return 0;
437}
438
439static void cleanup_queue(struct nullb_queue *nq)
440{
441 kfree(nq->tag_map);
442 kfree(nq->cmds);
443}
444
445static void cleanup_queues(struct nullb *nullb)
446{
447 int i;
448
449 for (i = 0; i < nullb->nr_queues; i++)
450 cleanup_queue(&nullb->queues[i]);
451
452 kfree(nullb->queues);
453}
454
455static int setup_queues(struct nullb *nullb)
456{
457 struct nullb_queue *nq;
458 int i;
459
460 nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL);
461 if (!nullb->queues)
462 return 1;
463
464 nullb->nr_queues = 0;
465 nullb->queue_depth = hw_queue_depth;
466
467 if (queue_mode == NULL_Q_MQ)
468 return 0;
469
470 for (i = 0; i < submit_queues; i++) {
471 nq = &nullb->queues[i];
472 init_waitqueue_head(&nq->wait);
473 nq->queue_depth = hw_queue_depth;
474 if (setup_commands(nq))
475 break;
476 nullb->nr_queues++;
477 }
478
479 if (i == submit_queues)
480 return 0;
481
482 cleanup_queues(nullb);
483 return 1;
484}
485
486static int null_add_dev(void)
487{
488 struct gendisk *disk;
489 struct nullb *nullb;
490 sector_t size;
491
492 nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
493 if (!nullb)
494 return -ENOMEM;
495
496 spin_lock_init(&nullb->lock);
497
498 if (setup_queues(nullb))
499 goto err;
500
501 if (queue_mode == NULL_Q_MQ) {
502 null_mq_reg.numa_node = home_node;
503 null_mq_reg.queue_depth = hw_queue_depth;
504
505 if (use_per_node_hctx) {
506 null_mq_reg.ops->alloc_hctx = null_alloc_hctx;
507 null_mq_reg.ops->free_hctx = null_free_hctx;
508
509 null_mq_reg.nr_hw_queues = nr_online_nodes;
510 } else {
511 null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue;
512 null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue;
513
514 null_mq_reg.nr_hw_queues = submit_queues;
515 }
516
517 nullb->q = blk_mq_init_queue(&null_mq_reg, nullb);
518 } else if (queue_mode == NULL_Q_BIO) {
519 nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
520 blk_queue_make_request(nullb->q, null_queue_bio);
521 } else {
522 nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
523 blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
524 if (nullb->q)
525 blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
526 }
527
528 if (!nullb->q)
529 goto queue_fail;
530
531 nullb->q->queuedata = nullb;
532 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
533
534 disk = nullb->disk = alloc_disk_node(1, home_node);
535 if (!disk) {
536queue_fail:
537 if (queue_mode == NULL_Q_MQ)
538 blk_mq_free_queue(nullb->q);
539 else
540 blk_cleanup_queue(nullb->q);
541 cleanup_queues(nullb);
542err:
543 kfree(nullb);
544 return -ENOMEM;
545 }
546
547 mutex_lock(&lock);
548 list_add_tail(&nullb->list, &nullb_list);
549 nullb->index = nullb_indexes++;
550 mutex_unlock(&lock);
551
552 blk_queue_logical_block_size(nullb->q, bs);
553 blk_queue_physical_block_size(nullb->q, bs);
554
555 size = gb * 1024 * 1024 * 1024ULL;
556 sector_div(size, bs);
557 set_capacity(disk, size);
558
559 disk->flags |= GENHD_FL_EXT_DEVT;
560 disk->major = null_major;
561 disk->first_minor = nullb->index;
562 disk->fops = &null_fops;
563 disk->private_data = nullb;
564 disk->queue = nullb->q;
565 sprintf(disk->disk_name, "nullb%d", nullb->index);
566 add_disk(disk);
567 return 0;
568}
569
570static int __init null_init(void)
571{
572 unsigned int i;
573
574#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS)
575 if (irqmode == NULL_IRQ_SOFTIRQ) {
576 pr_warn("null_blk: softirq completions not available.\n");
577 pr_warn("null_blk: using direct completions.\n");
578 irqmode = NULL_IRQ_NONE;
579 }
580#endif
581
582 if (submit_queues > nr_cpu_ids)
583 submit_queues = nr_cpu_ids;
584 else if (!submit_queues)
585 submit_queues = 1;
586
587 mutex_init(&lock);
588
589 /* Initialize a separate list for each CPU for issuing softirqs */
590 for_each_possible_cpu(i) {
591 struct completion_queue *cq = &per_cpu(completion_queues, i);
592
593 init_llist_head(&cq->list);
594
595 if (irqmode != NULL_IRQ_TIMER)
596 continue;
597
598 hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
599 cq->timer.function = null_cmd_timer_expired;
600 }
601
602 null_major = register_blkdev(0, "nullb");
603 if (null_major < 0)
604 return null_major;
605
606 for (i = 0; i < nr_devices; i++) {
607 if (null_add_dev()) {
608 unregister_blkdev(null_major, "nullb");
609 return -EINVAL;
610 }
611 }
612
613 pr_info("null: module loaded\n");
614 return 0;
615}
616
617static void __exit null_exit(void)
618{
619 struct nullb *nullb;
620
621 unregister_blkdev(null_major, "nullb");
622
623 mutex_lock(&lock);
624 while (!list_empty(&nullb_list)) {
625 nullb = list_entry(nullb_list.next, struct nullb, list);
626 null_del_dev(nullb);
627 }
628 mutex_unlock(&lock);
629}
630
631module_init(null_init);
632module_exit(null_exit);
633
634MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
635MODULE_LICENSE("GPL");
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a4660bbee8a6..8d53ed293606 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1336,57 +1336,6 @@ static int blkfront_probe(struct xenbus_device *dev,
1336 return 0; 1336 return 0;
1337} 1337}
1338 1338
1339/*
1340 * This is a clone of md_trim_bio, used to split a bio into smaller ones
1341 */
1342static void trim_bio(struct bio *bio, int offset, int size)
1343{
1344 /* 'bio' is a cloned bio which we need to trim to match
1345 * the given offset and size.
1346 * This requires adjusting bi_sector, bi_size, and bi_io_vec
1347 */
1348 int i;
1349 struct bio_vec *bvec;
1350 int sofar = 0;
1351
1352 size <<= 9;
1353 if (offset == 0 && size == bio->bi_size)
1354 return;
1355
1356 bio->bi_sector += offset;
1357 bio->bi_size = size;
1358 offset <<= 9;
1359 clear_bit(BIO_SEG_VALID, &bio->bi_flags);
1360
1361 while (bio->bi_idx < bio->bi_vcnt &&
1362 bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
1363 /* remove this whole bio_vec */
1364 offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
1365 bio->bi_idx++;
1366 }
1367 if (bio->bi_idx < bio->bi_vcnt) {
1368 bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
1369 bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
1370 }
1371 /* avoid any complications with bi_idx being non-zero*/
1372 if (bio->bi_idx) {
1373 memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
1374 (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
1375 bio->bi_vcnt -= bio->bi_idx;
1376 bio->bi_idx = 0;
1377 }
1378 /* Make sure vcnt and last bv are not too big */
1379 bio_for_each_segment(bvec, bio, i) {
1380 if (sofar + bvec->bv_len > size)
1381 bvec->bv_len = size - sofar;
1382 if (bvec->bv_len == 0) {
1383 bio->bi_vcnt = i;
1384 break;
1385 }
1386 sofar += bvec->bv_len;
1387 }
1388}
1389
1390static void split_bio_end(struct bio *bio, int error) 1339static void split_bio_end(struct bio *bio, int error)
1391{ 1340{
1392 struct split_bio *split_bio = bio->bi_private; 1341 struct split_bio *split_bio = bio->bi_private;
@@ -1522,7 +1471,7 @@ static int blkif_recover(struct blkfront_info *info)
1522 (unsigned int)(bio->bi_size >> 9) - offset); 1471 (unsigned int)(bio->bi_size >> 9) - offset);
1523 cloned_bio = bio_clone(bio, GFP_NOIO); 1472 cloned_bio = bio_clone(bio, GFP_NOIO);
1524 BUG_ON(cloned_bio == NULL); 1473 BUG_ON(cloned_bio == NULL);
1525 trim_bio(cloned_bio, offset, size); 1474 bio_trim(cloned_bio, offset, size);
1526 cloned_bio->bi_private = split_bio; 1475 cloned_bio->bi_private = split_bio;
1527 cloned_bio->bi_end_io = split_bio_end; 1476 cloned_bio->bi_end_io = split_bio_end;
1528 submit_bio(cloned_bio->bi_rw, cloned_bio); 1477 submit_bio(cloned_bio->bi_rw, cloned_bio);