aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/osdblk.c
diff options
context:
space:
mode:
authorJeff Garzik <jeff@garzik.org>2009-04-10 07:50:45 -0400
committerBoaz Harrosh <bharrosh@panasas.com>2009-06-24 05:25:02 -0400
commit2a13877c5ef3207a2a5c56250742e60808677f90 (patch)
treed6f03fb07ea97770a15e69de348793583a24cf19 /drivers/block/osdblk.c
parent42c55aa838bbd274a7ad2be1fd81d423ca63da4e (diff)
osdblk: a Linux block device for OSD objects
Submitted driver exports a block device of the form /dev/osdblkX, where X is a decimal number. It does that by mounting a stacking block device on top of an osd object. For example, if you create a 2G object on an OSD device, you can then use this module to present that 2G object as a Linux block device. See inside patch for exact documentation. [Sitting at linux-next helped fix proper Kconfig dependency for this driver, thanks to Randy Dunlap] Signed-off-by: Jeff Garzik <jgarzik@redhat.com> Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'drivers/block/osdblk.c')
-rw-r--r--drivers/block/osdblk.c694
1 files changed, 694 insertions, 0 deletions
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
new file mode 100644
index 000000000000..3565d0dd123f
--- /dev/null
+++ b/drivers/block/osdblk.c
@@ -0,0 +1,694 @@
1
2/*
3 osdblk.c -- Export a single SCSI OSD object as a Linux block device
4
5
6 Copyright 2009 Red Hat, Inc.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; see the file COPYING. If not, write to
19 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
20
21
22 Instructions for use
23 --------------------
24
25 1) Map a Linux block device to an existing OSD object.
26
27 In this example, we will use partition id 1234, object id 5678,
28 OSD device /dev/osd1.
29
30 $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add
31
32
33 2) List all active blkdev<->object mappings.
34
35 In this example, we have performed step #1 twice, creating two blkdevs,
36 mapped to two separate OSD objects.
37
38 $ cat /sys/class/osdblk/list
39 0 174 1234 5678 /dev/osd1
40 1 179 1994 897123 /dev/osd0
41
42 The columns, in order, are:
43 - blkdev unique id
44 - blkdev assigned major
45 - OSD object partition id
46 - OSD object id
47 - OSD device
48
49
50 3) Remove an active blkdev<->object mapping.
51
52 In this example, we remove the mapping with blkdev unique id 1.
53
54 $ echo 1 > /sys/class/osdblk/remove
55
56
57 NOTE: The actual creation and deletion of OSD objects is outside the scope
58 of this driver.
59
60 */
61
62#include <linux/kernel.h>
63#include <linux/device.h>
64#include <linux/module.h>
65#include <linux/fs.h>
66#include <scsi/osd_initiator.h>
67#include <scsi/osd_attributes.h>
68#include <scsi/osd_sec.h>
69
70#define DRV_NAME "osdblk"
71#define PFX DRV_NAME ": "
72
73/* #define _OSDBLK_DEBUG */
74#ifdef _OSDBLK_DEBUG
75#define OSDBLK_DEBUG(fmt, a...) \
76 printk(KERN_NOTICE "osdblk @%s:%d: " fmt, __func__, __LINE__, ##a)
77#else
78#define OSDBLK_DEBUG(fmt, a...) \
79 do { if (0) printk(fmt, ##a); } while (0)
80#endif
81
82MODULE_AUTHOR("Jeff Garzik <jeff@garzik.org>");
83MODULE_DESCRIPTION("block device inside an OSD object osdblk.ko");
84MODULE_LICENSE("GPL");
85
86struct osdblk_device;
87
88enum {
89 OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */
90 OSDBLK_MAX_REQ = 32, /* max parallel requests */
91 OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */
92};
93
94struct osdblk_request {
95 struct request *rq; /* blk layer request */
96 struct bio *bio; /* cloned bio */
97 struct osdblk_device *osdev; /* associated blkdev */
98};
99
100struct osdblk_device {
101 int id; /* blkdev unique id */
102
103 int major; /* blkdev assigned major */
104 struct gendisk *disk; /* blkdev's gendisk and rq */
105 struct request_queue *q;
106
107 struct osd_dev *osd; /* associated OSD */
108
109 char name[32]; /* blkdev name, e.g. osdblk34 */
110
111 spinlock_t lock; /* queue lock */
112
113 struct osd_obj_id obj; /* OSD partition, obj id */
114 uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */
115
116 struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */
117
118 struct list_head node;
119
120 char osd_path[0]; /* OSD device path */
121};
122
123static struct class *class_osdblk; /* /sys/class/osdblk */
124static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
125static LIST_HEAD(osdblkdev_list);
126
127static struct block_device_operations osdblk_bd_ops = {
128 .owner = THIS_MODULE,
129};
130
131static const struct osd_attr g_attr_logical_length = ATTR_DEF(
132 OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
133
134static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN],
135 const struct osd_obj_id *obj)
136{
137 osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
138}
139
140/* copied from exofs; move to libosd? */
141/*
142 * Perform a synchronous OSD operation. copied from exofs; move to libosd?
143 */
144static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential)
145{
146 int ret;
147
148 or->timeout = timeout;
149 ret = osd_finalize_request(or, 0, credential, NULL);
150 if (ret)
151 return ret;
152
153 ret = osd_execute_request(or);
154
155 /* osd_req_decode_sense(or, ret); */
156 return ret;
157}
158
159/*
160 * Perform an asynchronous OSD operation. copied from exofs; move to libosd?
161 */
162static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done,
163 void *caller_context, u8 *cred)
164{
165 int ret;
166
167 ret = osd_finalize_request(or, 0, cred, NULL);
168 if (ret)
169 return ret;
170
171 ret = osd_execute_request_async(or, async_done, caller_context);
172
173 return ret;
174}
175
176/* copied from exofs; move to libosd? */
177static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
178{
179 struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
180 void *iter = NULL;
181 int nelem;
182
183 do {
184 nelem = 1;
185 osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter);
186 if ((cur_attr.attr_page == attr->attr_page) &&
187 (cur_attr.attr_id == attr->attr_id)) {
188 attr->len = cur_attr.len;
189 attr->val_ptr = cur_attr.val_ptr;
190 return 0;
191 }
192 } while (iter);
193
194 return -EIO;
195}
196
197static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out)
198{
199 struct osd_request *or;
200 struct osd_attr attr;
201 int ret;
202
203 /* start request */
204 or = osd_start_request(osdev->osd, GFP_KERNEL);
205 if (!or)
206 return -ENOMEM;
207
208 /* create a get-attributes(length) request */
209 osd_req_get_attributes(or, &osdev->obj);
210
211 osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
212
213 /* execute op synchronously */
214 ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred);
215 if (ret)
216 goto out;
217
218 /* extract length from returned attribute info */
219 attr = g_attr_logical_length;
220 ret = extract_attr_from_req(or, &attr);
221 if (ret)
222 goto out;
223
224 *size_out = get_unaligned_be64(attr.val_ptr);
225
226out:
227 osd_end_request(or);
228 return ret;
229
230}
231
232static void osdblk_osd_complete(struct osd_request *or, void *private)
233{
234 struct osdblk_request *orq = private;
235 struct osd_sense_info osi;
236 int ret = osd_req_decode_sense(or, &osi);
237
238 if (ret) {
239 ret = -EIO;
240 OSDBLK_DEBUG("osdblk_osd_complete with err=%d\n", ret);
241 }
242
243 /* complete OSD request */
244 osd_end_request(or);
245
246 /* complete request passed to osdblk by block layer */
247 __blk_end_request_all(orq->rq, ret);
248}
249
250static void bio_chain_put(struct bio *chain)
251{
252 struct bio *tmp;
253
254 while (chain) {
255 tmp = chain;
256 chain = chain->bi_next;
257
258 bio_put(tmp);
259 }
260}
261
262static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
263{
264 struct bio *tmp, *new_chain = NULL, *tail = NULL;
265
266 while (old_chain) {
267 tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs);
268 if (!tmp)
269 goto err_out;
270
271 __bio_clone(tmp, old_chain);
272 tmp->bi_bdev = NULL;
273 gfpmask &= ~__GFP_WAIT;
274 tmp->bi_next = NULL;
275
276 if (!new_chain)
277 new_chain = tail = tmp;
278 else {
279 tail->bi_next = tmp;
280 tail = tmp;
281 }
282
283 old_chain = old_chain->bi_next;
284 }
285
286 return new_chain;
287
288err_out:
289 OSDBLK_DEBUG("bio_chain_clone with err\n");
290 bio_chain_put(new_chain);
291 return NULL;
292}
293
294static void osdblk_rq_fn(struct request_queue *q)
295{
296 struct osdblk_device *osdev = q->queuedata;
297
298 while (1) {
299 struct request *rq;
300 struct osdblk_request *orq;
301 struct osd_request *or;
302 struct bio *bio;
303 bool do_write, do_flush;
304
305 /* peek at request from block layer */
306 rq = blk_fetch_request(q);
307 if (!rq)
308 break;
309
310 /* filter out block requests we don't understand */
311 if (!blk_fs_request(rq) && !blk_barrier_rq(rq)) {
312 blk_end_request_all(rq, 0);
313 continue;
314 }
315
316 /* deduce our operation (read, write, flush) */
317 /* I wish the block layer simplified cmd_type/cmd_flags/cmd[]
318 * into a clearly defined set of RPC commands:
319 * read, write, flush, scsi command, power mgmt req,
320 * driver-specific, etc.
321 */
322
323 do_flush = (rq->special == (void *) 0xdeadbeefUL);
324 do_write = (rq_data_dir(rq) == WRITE);
325
326 if (!do_flush) { /* osd_flush does not use a bio */
327 /* a bio clone to be passed down to OSD request */
328 bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
329 if (!bio)
330 break;
331 } else
332 bio = NULL;
333
334 /* alloc internal OSD request, for OSD command execution */
335 or = osd_start_request(osdev->osd, GFP_ATOMIC);
336 if (!or) {
337 bio_chain_put(bio);
338 OSDBLK_DEBUG("osd_start_request with err\n");
339 break;
340 }
341
342 orq = &osdev->req[rq->tag];
343 orq->rq = rq;
344 orq->bio = bio;
345 orq->osdev = osdev;
346
347 /* init OSD command: flush, write or read */
348 if (do_flush)
349 osd_req_flush_object(or, &osdev->obj,
350 OSD_CDB_FLUSH_ALL, 0, 0);
351 else if (do_write)
352 osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
353 bio, blk_rq_bytes(rq));
354 else
355 osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
356 bio, blk_rq_bytes(rq));
357
358 OSDBLK_DEBUG("%s 0x%x bytes at 0x%llx\n",
359 do_flush ? "flush" : do_write ?
360 "write" : "read", blk_rq_bytes(rq),
361 blk_rq_pos(rq) * 512ULL);
362
363 /* begin OSD command execution */
364 if (osd_async_op(or, osdblk_osd_complete, orq,
365 osdev->obj_cred)) {
366 osd_end_request(or);
367 blk_requeue_request(q, rq);
368 bio_chain_put(bio);
369 OSDBLK_DEBUG("osd_execute_request_async with err\n");
370 break;
371 }
372
373 /* remove the special 'flush' marker, now that the command
374 * is executing
375 */
376 rq->special = NULL;
377 }
378}
379
380static void osdblk_prepare_flush(struct request_queue *q, struct request *rq)
381{
382 /* add driver-specific marker, to indicate that this request
383 * is a flush command
384 */
385 rq->special = (void *) 0xdeadbeefUL;
386}
387
388static void osdblk_free_disk(struct osdblk_device *osdev)
389{
390 struct gendisk *disk = osdev->disk;
391
392 if (!disk)
393 return;
394
395 if (disk->flags & GENHD_FL_UP)
396 del_gendisk(disk);
397 if (disk->queue)
398 blk_cleanup_queue(disk->queue);
399 put_disk(disk);
400}
401
402static int osdblk_init_disk(struct osdblk_device *osdev)
403{
404 struct gendisk *disk;
405 struct request_queue *q;
406 int rc;
407 u64 obj_size = 0;
408
409 /* contact OSD, request size info about the object being mapped */
410 rc = osdblk_get_obj_size(osdev, &obj_size);
411 if (rc)
412 return rc;
413
414 /* create gendisk info */
415 disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR);
416 if (!disk)
417 return -ENOMEM;
418
419 sprintf(disk->disk_name, DRV_NAME "%d", osdev->id);
420 disk->major = osdev->major;
421 disk->first_minor = 0;
422 disk->fops = &osdblk_bd_ops;
423 disk->private_data = osdev;
424
425 /* init rq */
426 q = blk_init_queue(osdblk_rq_fn, &osdev->lock);
427 if (!q) {
428 put_disk(disk);
429 return -ENOMEM;
430 }
431
432 /* switch queue to TCQ mode; allocate tag map */
433 rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL);
434 if (rc) {
435 blk_cleanup_queue(q);
436 put_disk(disk);
437 return rc;
438 }
439
440 blk_queue_prep_rq(q, blk_queue_start_tag);
441 blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, osdblk_prepare_flush);
442
443 disk->queue = q;
444
445 q->queuedata = osdev;
446
447 osdev->disk = disk;
448 osdev->q = q;
449
450 /* finally, announce the disk to the world */
451 set_capacity(disk, obj_size / 512ULL);
452 add_disk(disk);
453
454 printk(KERN_INFO "%s: Added of size 0x%llx\n",
455 disk->disk_name, (unsigned long long)obj_size);
456
457 return 0;
458}
459
460/********************************************************************
461 * /sys/class/osdblk/
462 * add map OSD object to blkdev
463 * remove unmap OSD object
464 * list show mappings
465 *******************************************************************/
466
467static void class_osdblk_release(struct class *cls)
468{
469 kfree(cls);
470}
471
472static ssize_t class_osdblk_list(struct class *c, char *data)
473{
474 int n = 0;
475 struct list_head *tmp;
476
477 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
478
479 list_for_each(tmp, &osdblkdev_list) {
480 struct osdblk_device *osdev;
481
482 osdev = list_entry(tmp, struct osdblk_device, node);
483
484 n += sprintf(data+n, "%d %d %llu %llu %s\n",
485 osdev->id,
486 osdev->major,
487 osdev->obj.partition,
488 osdev->obj.id,
489 osdev->osd_path);
490 }
491
492 mutex_unlock(&ctl_mutex);
493 return n;
494}
495
496static ssize_t class_osdblk_add(struct class *c, const char *buf, size_t count)
497{
498 struct osdblk_device *osdev;
499 ssize_t rc;
500 int irc, new_id = 0;
501 struct list_head *tmp;
502
503 if (!try_module_get(THIS_MODULE))
504 return -ENODEV;
505
506 /* new osdblk_device object */
507 osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL);
508 if (!osdev) {
509 rc = -ENOMEM;
510 goto err_out_mod;
511 }
512
513 /* static osdblk_device initialization */
514 spin_lock_init(&osdev->lock);
515 INIT_LIST_HEAD(&osdev->node);
516
517 /* generate unique id: find highest unique id, add one */
518
519 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
520
521 list_for_each(tmp, &osdblkdev_list) {
522 struct osdblk_device *osdev;
523
524 osdev = list_entry(tmp, struct osdblk_device, node);
525 if (osdev->id > new_id)
526 new_id = osdev->id + 1;
527 }
528
529 osdev->id = new_id;
530
531 /* add to global list */
532 list_add_tail(&osdev->node, &osdblkdev_list);
533
534 mutex_unlock(&ctl_mutex);
535
536 /* parse add command */
537 if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id,
538 osdev->osd_path) != 3) {
539 rc = -EINVAL;
540 goto err_out_slot;
541 }
542
543 /* initialize rest of new object */
544 sprintf(osdev->name, DRV_NAME "%d", osdev->id);
545
546 /* contact requested OSD */
547 osdev->osd = osduld_path_lookup(osdev->osd_path);
548 if (IS_ERR(osdev->osd)) {
549 rc = PTR_ERR(osdev->osd);
550 goto err_out_slot;
551 }
552
553 /* build OSD credential */
554 osdblk_make_credential(osdev->obj_cred, &osdev->obj);
555
556 /* register our block device */
557 irc = register_blkdev(0, osdev->name);
558 if (irc < 0) {
559 rc = irc;
560 goto err_out_osd;
561 }
562
563 osdev->major = irc;
564
565 /* set up and announce blkdev mapping */
566 rc = osdblk_init_disk(osdev);
567 if (rc)
568 goto err_out_blkdev;
569
570 return count;
571
572err_out_blkdev:
573 unregister_blkdev(osdev->major, osdev->name);
574err_out_osd:
575 osduld_put_device(osdev->osd);
576err_out_slot:
577 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
578 list_del_init(&osdev->node);
579 mutex_unlock(&ctl_mutex);
580
581 kfree(osdev);
582err_out_mod:
583 OSDBLK_DEBUG("Error adding device %s\n", buf);
584 module_put(THIS_MODULE);
585 return rc;
586}
587
588static ssize_t class_osdblk_remove(struct class *c, const char *buf,
589 size_t count)
590{
591 struct osdblk_device *osdev = NULL;
592 int target_id, rc;
593 unsigned long ul;
594 struct list_head *tmp;
595
596 rc = strict_strtoul(buf, 10, &ul);
597 if (rc)
598 return rc;
599
600 /* convert to int; abort if we lost anything in the conversion */
601 target_id = (int) ul;
602 if (target_id != ul)
603 return -EINVAL;
604
605 /* remove object from list immediately */
606 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
607
608 list_for_each(tmp, &osdblkdev_list) {
609 osdev = list_entry(tmp, struct osdblk_device, node);
610 if (osdev->id == target_id) {
611 list_del_init(&osdev->node);
612 break;
613 }
614 osdev = NULL;
615 }
616
617 mutex_unlock(&ctl_mutex);
618
619 if (!osdev)
620 return -ENOENT;
621
622 /* clean up and free blkdev and associated OSD connection */
623 osdblk_free_disk(osdev);
624 unregister_blkdev(osdev->major, osdev->name);
625 osduld_put_device(osdev->osd);
626 kfree(osdev);
627
628 /* release module ref */
629 module_put(THIS_MODULE);
630
631 return count;
632}
633
634static struct class_attribute class_osdblk_attrs[] = {
635 __ATTR(add, 0200, NULL, class_osdblk_add),
636 __ATTR(remove, 0200, NULL, class_osdblk_remove),
637 __ATTR(list, 0444, class_osdblk_list, NULL),
638 __ATTR_NULL
639};
640
641static int osdblk_sysfs_init(void)
642{
643 int ret = 0;
644
645 /*
646 * create control files in sysfs
647 * /sys/class/osdblk/...
648 */
649 class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL);
650 if (!class_osdblk)
651 return -ENOMEM;
652
653 class_osdblk->name = DRV_NAME;
654 class_osdblk->owner = THIS_MODULE;
655 class_osdblk->class_release = class_osdblk_release;
656 class_osdblk->class_attrs = class_osdblk_attrs;
657
658 ret = class_register(class_osdblk);
659 if (ret) {
660 kfree(class_osdblk);
661 class_osdblk = NULL;
662 printk(PFX "failed to create class osdblk\n");
663 return ret;
664 }
665
666 return 0;
667}
668
669static void osdblk_sysfs_cleanup(void)
670{
671 if (class_osdblk)
672 class_destroy(class_osdblk);
673 class_osdblk = NULL;
674}
675
676static int __init osdblk_init(void)
677{
678 int rc;
679
680 rc = osdblk_sysfs_init();
681 if (rc)
682 return rc;
683
684 return 0;
685}
686
687static void __exit osdblk_exit(void)
688{
689 osdblk_sysfs_cleanup();
690}
691
692module_init(osdblk_init);
693module_exit(osdblk_exit);
694