aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-12-27 01:37:05 -0500
committerDavid S. Miller <davem@davemloft.net>2010-12-27 01:37:05 -0500
commit17f7f4d9fcce8f1b75b5f735569309dee7665968 (patch)
tree14d7e49ca0053a0fcab3c33b5023bf3f90c5c08a /drivers/block
parent041110a439e21cd40709ead4ffbfa8034619ad77 (diff)
parentd7c1255a3a21e98bdc64df8ccf005a174d7e6289 (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Conflicts: net/ipv4/fib_frontend.c
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/amiflop.c2
-rw-r--r--drivers/block/ataflop.c2
-rw-r--r--drivers/block/cciss.c5
-rw-r--r--drivers/block/cciss_scsi.c8
-rw-r--r--drivers/block/drbd/drbd_receiver.c15
-rw-r--r--drivers/block/drbd/drbd_req.h3
-rw-r--r--drivers/block/drbd/drbd_worker.c11
-rw-r--r--drivers/block/rbd.c748
-rw-r--r--drivers/block/xen-blkfront.c57
9 files changed, 534 insertions, 317 deletions
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a1725e6488d3..7888501ad9ee 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1341,7 +1341,7 @@ static struct request *set_next_request(void)
1341{ 1341{
1342 struct request_queue *q; 1342 struct request_queue *q;
1343 int cnt = FD_MAX_UNITS; 1343 int cnt = FD_MAX_UNITS;
1344 struct request *rq; 1344 struct request *rq = NULL;
1345 1345
1346 /* Find next queue we can dispatch from */ 1346 /* Find next queue we can dispatch from */
1347 fdc_queue = fdc_queue + 1; 1347 fdc_queue = fdc_queue + 1;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 4e4cc6c828cb..605a67e40bbf 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1399,7 +1399,7 @@ static struct request *set_next_request(void)
1399{ 1399{
1400 struct request_queue *q; 1400 struct request_queue *q;
1401 int old_pos = fdc_queue; 1401 int old_pos = fdc_queue;
1402 struct request *rq; 1402 struct request *rq = NULL;
1403 1403
1404 do { 1404 do {
1405 q = unit[fdc_queue].disk->queue; 1405 q = unit[fdc_queue].disk->queue;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index a67d0a611a8a..8e0f9256eb58 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -66,6 +66,7 @@ MODULE_VERSION("3.6.26");
66MODULE_LICENSE("GPL"); 66MODULE_LICENSE("GPL");
67 67
68static DEFINE_MUTEX(cciss_mutex); 68static DEFINE_MUTEX(cciss_mutex);
69static struct proc_dir_entry *proc_cciss;
69 70
70#include "cciss_cmd.h" 71#include "cciss_cmd.h"
71#include "cciss.h" 72#include "cciss.h"
@@ -363,8 +364,6 @@ static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
363#define ENG_GIG_FACTOR (ENG_GIG/512) 364#define ENG_GIG_FACTOR (ENG_GIG/512)
364#define ENGAGE_SCSI "engage scsi" 365#define ENGAGE_SCSI "engage scsi"
365 366
366static struct proc_dir_entry *proc_cciss;
367
368static void cciss_seq_show_header(struct seq_file *seq) 367static void cciss_seq_show_header(struct seq_file *seq)
369{ 368{
370 ctlr_info_t *h = seq->private; 369 ctlr_info_t *h = seq->private;
@@ -2835,6 +2834,8 @@ static int cciss_revalidate(struct gendisk *disk)
2835 InquiryData_struct *inq_buff = NULL; 2834 InquiryData_struct *inq_buff = NULL;
2836 2835
2837 for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) { 2836 for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
2837 if (!h->drv[logvol])
2838 continue;
2838 if (memcmp(h->drv[logvol]->LunID, drv->LunID, 2839 if (memcmp(h->drv[logvol]->LunID, drv->LunID,
2839 sizeof(drv->LunID)) == 0) { 2840 sizeof(drv->LunID)) == 0) {
2840 FOUND = 1; 2841 FOUND = 1;
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 575495f3c4b8..727d0225b7d0 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -62,8 +62,8 @@ static int cciss_scsi_proc_info(
62 int length, /* length of data in buffer */ 62 int length, /* length of data in buffer */
63 int func); /* 0 == read, 1 == write */ 63 int func); /* 0 == read, 1 == write */
64 64
65static int cciss_scsi_queue_command (struct scsi_cmnd *cmd, 65static int cciss_scsi_queue_command (struct Scsi_Host *h,
66 void (* done)(struct scsi_cmnd *)); 66 struct scsi_cmnd *cmd);
67static int cciss_eh_device_reset_handler(struct scsi_cmnd *); 67static int cciss_eh_device_reset_handler(struct scsi_cmnd *);
68static int cciss_eh_abort_handler(struct scsi_cmnd *); 68static int cciss_eh_abort_handler(struct scsi_cmnd *);
69 69
@@ -1406,7 +1406,7 @@ static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
1406 1406
1407 1407
1408static int 1408static int
1409cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *)) 1409cciss_scsi_queue_command_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
1410{ 1410{
1411 ctlr_info_t *h; 1411 ctlr_info_t *h;
1412 int rc; 1412 int rc;
@@ -1504,6 +1504,8 @@ cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd
1504 return 0; 1504 return 0;
1505} 1505}
1506 1506
1507static DEF_SCSI_QCMD(cciss_scsi_queue_command)
1508
1507static void cciss_unregister_scsi(ctlr_info_t *h) 1509static void cciss_unregister_scsi(ctlr_info_t *h)
1508{ 1510{
1509 struct cciss_scsi_adapter_data_t *sa; 1511 struct cciss_scsi_adapter_data_t *sa;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index d299fe9e78c8..24487d4fb202 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -36,7 +36,6 @@
36#include <linux/memcontrol.h> 36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h> 37#include <linux/mm_inline.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/smp_lock.h>
40#include <linux/pkt_sched.h> 39#include <linux/pkt_sched.h>
41#define __KERNEL_SYSCALLS__ 40#define __KERNEL_SYSCALLS__
42#include <linux/unistd.h> 41#include <linux/unistd.h>
@@ -3628,17 +3627,19 @@ static void drbdd(struct drbd_conf *mdev)
3628 } 3627 }
3629 3628
3630 shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header); 3629 shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header);
3631 rv = drbd_recv(mdev, &header->h80.payload, shs);
3632 if (unlikely(rv != shs)) {
3633 dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
3634 goto err_out;
3635 }
3636
3637 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { 3630 if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) {
3638 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); 3631 dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size);
3639 goto err_out; 3632 goto err_out;
3640 } 3633 }
3641 3634
3635 if (shs) {
3636 rv = drbd_recv(mdev, &header->h80.payload, shs);
3637 if (unlikely(rv != shs)) {
3638 dev_err(DEV, "short read while reading sub header: rv=%d\n", rv);
3639 goto err_out;
3640 }
3641 }
3642
3642 rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs); 3643 rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs);
3643 3644
3644 if (unlikely(!rv)) { 3645 if (unlikely(!rv)) {
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 181ea0364822..ab2bd09d54b4 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -339,7 +339,8 @@ static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
339} 339}
340 340
341/* completion of master bio is outside of spinlock. 341/* completion of master bio is outside of spinlock.
342 * If you need it irqsave, do it your self! */ 342 * If you need it irqsave, do it your self!
343 * Which means: don't use from bio endio callback. */
343static inline int req_mod(struct drbd_request *req, 344static inline int req_mod(struct drbd_request *req,
344 enum drbd_req_event what) 345 enum drbd_req_event what)
345{ 346{
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b0551ba7ad0c..34f224b018b3 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -26,7 +26,6 @@
26#include <linux/module.h> 26#include <linux/module.h>
27#include <linux/drbd.h> 27#include <linux/drbd.h>
28#include <linux/sched.h> 28#include <linux/sched.h>
29#include <linux/smp_lock.h>
30#include <linux/wait.h> 29#include <linux/wait.h>
31#include <linux/mm.h> 30#include <linux/mm.h>
32#include <linux/memcontrol.h> 31#include <linux/memcontrol.h>
@@ -194,8 +193,10 @@ void drbd_endio_sec(struct bio *bio, int error)
194 */ 193 */
195void drbd_endio_pri(struct bio *bio, int error) 194void drbd_endio_pri(struct bio *bio, int error)
196{ 195{
196 unsigned long flags;
197 struct drbd_request *req = bio->bi_private; 197 struct drbd_request *req = bio->bi_private;
198 struct drbd_conf *mdev = req->mdev; 198 struct drbd_conf *mdev = req->mdev;
199 struct bio_and_error m;
199 enum drbd_req_event what; 200 enum drbd_req_event what;
200 int uptodate = bio_flagged(bio, BIO_UPTODATE); 201 int uptodate = bio_flagged(bio, BIO_UPTODATE);
201 202
@@ -221,7 +222,13 @@ void drbd_endio_pri(struct bio *bio, int error)
221 bio_put(req->private_bio); 222 bio_put(req->private_bio);
222 req->private_bio = ERR_PTR(error); 223 req->private_bio = ERR_PTR(error);
223 224
224 req_mod(req, what); 225 /* not req_mod(), we need irqsave here! */
226 spin_lock_irqsave(&mdev->req_lock, flags);
227 __req_mod(req, what, &m);
228 spin_unlock_irqrestore(&mdev->req_lock, flags);
229
230 if (m.bio)
231 complete_master_bio(mdev, &m);
225} 232}
226 233
227int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) 234int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 6ec9d53806c5..008d4a00b50d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -21,80 +21,9 @@
21 21
22 22
23 23
24 Instructions for use 24 For usage instructions, please refer to:
25 --------------------
26 25
27 1) Map a Linux block device to an existing rbd image. 26 Documentation/ABI/testing/sysfs-bus-rbd
28
29 Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name]
30
31 $ echo "192.168.0.1 name=admin rbd foo" > /sys/class/rbd/add
32
33 The snapshot name can be "-" or omitted to map the image read/write.
34
35 2) List all active blkdev<->object mappings.
36
37 In this example, we have performed step #1 twice, creating two blkdevs,
38 mapped to two separate rados objects in the rados rbd pool
39
40 $ cat /sys/class/rbd/list
41 #id major client_name pool name snap KB
42 0 254 client4143 rbd foo - 1024000
43
44 The columns, in order, are:
45 - blkdev unique id
46 - blkdev assigned major
47 - rados client id
48 - rados pool name
49 - rados block device name
50 - mapped snapshot ("-" if none)
51 - device size in KB
52
53
54 3) Create a snapshot.
55
56 Usage: <blkdev id> <snapname>
57
58 $ echo "0 mysnap" > /sys/class/rbd/snap_create
59
60
61 4) Listing a snapshot.
62
63 $ cat /sys/class/rbd/snaps_list
64 #id snap KB
65 0 - 1024000 (*)
66 0 foo 1024000
67
68 The columns, in order, are:
69 - blkdev unique id
70 - snapshot name, '-' means none (active read/write version)
71 - size of device at time of snapshot
72 - the (*) indicates this is the active version
73
74 5) Rollback to snapshot.
75
76 Usage: <blkdev id> <snapname>
77
78 $ echo "0 mysnap" > /sys/class/rbd/snap_rollback
79
80
81 6) Mapping an image using snapshot.
82
83 A snapshot mapping is read-only. This is being done by passing
84 snap=<snapname> to the options when adding a device.
85
86 $ echo "192.168.0.1 name=admin,snap=mysnap rbd foo" > /sys/class/rbd/add
87
88
89 7) Remove an active blkdev<->rbd image mapping.
90
91 In this example, we remove the mapping with blkdev unique id 1.
92
93 $ echo 1 > /sys/class/rbd/remove
94
95
96 NOTE: The actual creation and deletion of rados objects is outside the scope
97 of this driver.
98 27
99 */ 28 */
100 29
@@ -163,6 +92,14 @@ struct rbd_request {
163 u64 len; 92 u64 len;
164}; 93};
165 94
95struct rbd_snap {
96 struct device dev;
97 const char *name;
98 size_t size;
99 struct list_head node;
100 u64 id;
101};
102
166/* 103/*
167 * a single device 104 * a single device
168 */ 105 */
@@ -193,21 +130,60 @@ struct rbd_device {
193 int read_only; 130 int read_only;
194 131
195 struct list_head node; 132 struct list_head node;
133
134 /* list of snapshots */
135 struct list_head snaps;
136
137 /* sysfs related */
138 struct device dev;
139};
140
141static struct bus_type rbd_bus_type = {
142 .name = "rbd",
196}; 143};
197 144
198static spinlock_t node_lock; /* protects client get/put */ 145static spinlock_t node_lock; /* protects client get/put */
199 146
200static struct class *class_rbd; /* /sys/class/rbd */
201static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ 147static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */
202static LIST_HEAD(rbd_dev_list); /* devices */ 148static LIST_HEAD(rbd_dev_list); /* devices */
203static LIST_HEAD(rbd_client_list); /* clients */ 149static LIST_HEAD(rbd_client_list); /* clients */
204 150
151static int __rbd_init_snaps_header(struct rbd_device *rbd_dev);
152static void rbd_dev_release(struct device *dev);
153static ssize_t rbd_snap_rollback(struct device *dev,
154 struct device_attribute *attr,
155 const char *buf,
156 size_t size);
157static ssize_t rbd_snap_add(struct device *dev,
158 struct device_attribute *attr,
159 const char *buf,
160 size_t count);
161static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
162 struct rbd_snap *snap);;
163
164
165static struct rbd_device *dev_to_rbd(struct device *dev)
166{
167 return container_of(dev, struct rbd_device, dev);
168}
169
170static struct device *rbd_get_dev(struct rbd_device *rbd_dev)
171{
172 return get_device(&rbd_dev->dev);
173}
174
175static void rbd_put_dev(struct rbd_device *rbd_dev)
176{
177 put_device(&rbd_dev->dev);
178}
205 179
206static int rbd_open(struct block_device *bdev, fmode_t mode) 180static int rbd_open(struct block_device *bdev, fmode_t mode)
207{ 181{
208 struct gendisk *disk = bdev->bd_disk; 182 struct gendisk *disk = bdev->bd_disk;
209 struct rbd_device *rbd_dev = disk->private_data; 183 struct rbd_device *rbd_dev = disk->private_data;
210 184
185 rbd_get_dev(rbd_dev);
186
211 set_device_ro(bdev, rbd_dev->read_only); 187 set_device_ro(bdev, rbd_dev->read_only);
212 188
213 if ((mode & FMODE_WRITE) && rbd_dev->read_only) 189 if ((mode & FMODE_WRITE) && rbd_dev->read_only)
@@ -216,9 +192,19 @@ static int rbd_open(struct block_device *bdev, fmode_t mode)
216 return 0; 192 return 0;
217} 193}
218 194
195static int rbd_release(struct gendisk *disk, fmode_t mode)
196{
197 struct rbd_device *rbd_dev = disk->private_data;
198
199 rbd_put_dev(rbd_dev);
200
201 return 0;
202}
203
219static const struct block_device_operations rbd_bd_ops = { 204static const struct block_device_operations rbd_bd_ops = {
220 .owner = THIS_MODULE, 205 .owner = THIS_MODULE,
221 .open = rbd_open, 206 .open = rbd_open,
207 .release = rbd_release,
222}; 208};
223 209
224/* 210/*
@@ -361,7 +347,6 @@ static int rbd_header_from_disk(struct rbd_image_header *header,
361 int ret = -ENOMEM; 347 int ret = -ENOMEM;
362 348
363 init_rwsem(&header->snap_rwsem); 349 init_rwsem(&header->snap_rwsem);
364
365 header->snap_names_len = le64_to_cpu(ondisk->snap_names_len); 350 header->snap_names_len = le64_to_cpu(ondisk->snap_names_len);
366 header->snapc = kmalloc(sizeof(struct ceph_snap_context) + 351 header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
367 snap_count * 352 snap_count *
@@ -1256,10 +1241,20 @@ bad:
1256 return -ERANGE; 1241 return -ERANGE;
1257} 1242}
1258 1243
1244static void __rbd_remove_all_snaps(struct rbd_device *rbd_dev)
1245{
1246 struct rbd_snap *snap;
1247
1248 while (!list_empty(&rbd_dev->snaps)) {
1249 snap = list_first_entry(&rbd_dev->snaps, struct rbd_snap, node);
1250 __rbd_remove_snap_dev(rbd_dev, snap);
1251 }
1252}
1253
1259/* 1254/*
1260 * only read the first part of the ondisk header, without the snaps info 1255 * only read the first part of the ondisk header, without the snaps info
1261 */ 1256 */
1262static int rbd_update_snaps(struct rbd_device *rbd_dev) 1257static int __rbd_update_snaps(struct rbd_device *rbd_dev)
1263{ 1258{
1264 int ret; 1259 int ret;
1265 struct rbd_image_header h; 1260 struct rbd_image_header h;
@@ -1280,12 +1275,15 @@ static int rbd_update_snaps(struct rbd_device *rbd_dev)
1280 rbd_dev->header.total_snaps = h.total_snaps; 1275 rbd_dev->header.total_snaps = h.total_snaps;
1281 rbd_dev->header.snapc = h.snapc; 1276 rbd_dev->header.snapc = h.snapc;
1282 rbd_dev->header.snap_names = h.snap_names; 1277 rbd_dev->header.snap_names = h.snap_names;
1278 rbd_dev->header.snap_names_len = h.snap_names_len;
1283 rbd_dev->header.snap_sizes = h.snap_sizes; 1279 rbd_dev->header.snap_sizes = h.snap_sizes;
1284 rbd_dev->header.snapc->seq = snap_seq; 1280 rbd_dev->header.snapc->seq = snap_seq;
1285 1281
1282 ret = __rbd_init_snaps_header(rbd_dev);
1283
1286 up_write(&rbd_dev->header.snap_rwsem); 1284 up_write(&rbd_dev->header.snap_rwsem);
1287 1285
1288 return 0; 1286 return ret;
1289} 1287}
1290 1288
1291static int rbd_init_disk(struct rbd_device *rbd_dev) 1289static int rbd_init_disk(struct rbd_device *rbd_dev)
@@ -1300,6 +1298,11 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
1300 if (rc) 1298 if (rc)
1301 return rc; 1299 return rc;
1302 1300
1301 /* no need to lock here, as rbd_dev is not registered yet */
1302 rc = __rbd_init_snaps_header(rbd_dev);
1303 if (rc)
1304 return rc;
1305
1303 rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size); 1306 rc = rbd_header_set_snap(rbd_dev, rbd_dev->snap_name, &total_size);
1304 if (rc) 1307 if (rc)
1305 return rc; 1308 return rc;
@@ -1343,54 +1346,360 @@ out:
1343 return rc; 1346 return rc;
1344} 1347}
1345 1348
1346/******************************************************************** 1349/*
1347 * /sys/class/rbd/ 1350 sysfs
1348 * add map rados objects to blkdev 1351*/
1349 * remove unmap rados objects 1352
1350 * list show mappings 1353static ssize_t rbd_size_show(struct device *dev,
1351 *******************************************************************/ 1354 struct device_attribute *attr, char *buf)
1355{
1356 struct rbd_device *rbd_dev = dev_to_rbd(dev);
1357
1358 return sprintf(buf, "%llu\n", (unsigned long long)rbd_dev->header.image_size);
1359}
1360
1361static ssize_t rbd_major_show(struct device *dev,
1362 struct device_attribute *attr, char *buf)
1363{
1364 struct rbd_device *rbd_dev = dev_to_rbd(dev);
1352 1365
1353static void class_rbd_release(struct class *cls) 1366 return sprintf(buf, "%d\n", rbd_dev->major);
1367}
1368
1369static ssize_t rbd_client_id_show(struct device *dev,
1370 struct device_attribute *attr, char *buf)
1354{ 1371{
1355 kfree(cls); 1372 struct rbd_device *rbd_dev = dev_to_rbd(dev);
1373
1374 return sprintf(buf, "client%lld\n", ceph_client_id(rbd_dev->client));
1356} 1375}
1357 1376
1358static ssize_t class_rbd_list(struct class *c, 1377static ssize_t rbd_pool_show(struct device *dev,
1359 struct class_attribute *attr, 1378 struct device_attribute *attr, char *buf)
1360 char *data)
1361{ 1379{
1362 int n = 0; 1380 struct rbd_device *rbd_dev = dev_to_rbd(dev);
1363 struct list_head *tmp; 1381
1364 int max = PAGE_SIZE; 1382 return sprintf(buf, "%s\n", rbd_dev->pool_name);
1383}
1384
1385static ssize_t rbd_name_show(struct device *dev,
1386 struct device_attribute *attr, char *buf)
1387{
1388 struct rbd_device *rbd_dev = dev_to_rbd(dev);
1389
1390 return sprintf(buf, "%s\n", rbd_dev->obj);
1391}
1392
1393static ssize_t rbd_snap_show(struct device *dev,
1394 struct device_attribute *attr,
1395 char *buf)
1396{
1397 struct rbd_device *rbd_dev = dev_to_rbd(dev);
1398
1399 return sprintf(buf, "%s\n", rbd_dev->snap_name);
1400}
1401
1402static ssize_t rbd_image_refresh(struct device *dev,
1403 struct device_attribute *attr,
1404 const char *buf,
1405 size_t size)
1406{
1407 struct rbd_device *rbd_dev = dev_to_rbd(dev);
1408 int rc;
1409 int ret = size;
1365 1410
1366 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 1411 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1367 1412
1368 n += snprintf(data, max, 1413 rc = __rbd_update_snaps(rbd_dev);
1369 "#id\tmajor\tclient_name\tpool\tname\tsnap\tKB\n"); 1414 if (rc < 0)
1415 ret = rc;
1370 1416
1371 list_for_each(tmp, &rbd_dev_list) { 1417 mutex_unlock(&ctl_mutex);
1372 struct rbd_device *rbd_dev; 1418 return ret;
1419}
1373 1420
1374 rbd_dev = list_entry(tmp, struct rbd_device, node); 1421static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
1375 n += snprintf(data+n, max-n, 1422static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
1376 "%d\t%d\tclient%lld\t%s\t%s\t%s\t%lld\n", 1423static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
1377 rbd_dev->id, 1424static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
1378 rbd_dev->major, 1425static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
1379 ceph_client_id(rbd_dev->client), 1426static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
1380 rbd_dev->pool_name, 1427static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
1381 rbd_dev->obj, rbd_dev->snap_name, 1428static DEVICE_ATTR(create_snap, S_IWUSR, NULL, rbd_snap_add);
1382 rbd_dev->header.image_size >> 10); 1429static DEVICE_ATTR(rollback_snap, S_IWUSR, NULL, rbd_snap_rollback);
1383 if (n == max) 1430
1431static struct attribute *rbd_attrs[] = {
1432 &dev_attr_size.attr,
1433 &dev_attr_major.attr,
1434 &dev_attr_client_id.attr,
1435 &dev_attr_pool.attr,
1436 &dev_attr_name.attr,
1437 &dev_attr_current_snap.attr,
1438 &dev_attr_refresh.attr,
1439 &dev_attr_create_snap.attr,
1440 &dev_attr_rollback_snap.attr,
1441 NULL
1442};
1443
1444static struct attribute_group rbd_attr_group = {
1445 .attrs = rbd_attrs,
1446};
1447
1448static const struct attribute_group *rbd_attr_groups[] = {
1449 &rbd_attr_group,
1450 NULL
1451};
1452
1453static void rbd_sysfs_dev_release(struct device *dev)
1454{
1455}
1456
1457static struct device_type rbd_device_type = {
1458 .name = "rbd",
1459 .groups = rbd_attr_groups,
1460 .release = rbd_sysfs_dev_release,
1461};
1462
1463
1464/*
1465 sysfs - snapshots
1466*/
1467
1468static ssize_t rbd_snap_size_show(struct device *dev,
1469 struct device_attribute *attr,
1470 char *buf)
1471{
1472 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1473
1474 return sprintf(buf, "%lld\n", (long long)snap->size);
1475}
1476
1477static ssize_t rbd_snap_id_show(struct device *dev,
1478 struct device_attribute *attr,
1479 char *buf)
1480{
1481 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1482
1483 return sprintf(buf, "%lld\n", (long long)snap->id);
1484}
1485
1486static DEVICE_ATTR(snap_size, S_IRUGO, rbd_snap_size_show, NULL);
1487static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
1488
1489static struct attribute *rbd_snap_attrs[] = {
1490 &dev_attr_snap_size.attr,
1491 &dev_attr_snap_id.attr,
1492 NULL,
1493};
1494
1495static struct attribute_group rbd_snap_attr_group = {
1496 .attrs = rbd_snap_attrs,
1497};
1498
1499static void rbd_snap_dev_release(struct device *dev)
1500{
1501 struct rbd_snap *snap = container_of(dev, struct rbd_snap, dev);
1502 kfree(snap->name);
1503 kfree(snap);
1504}
1505
1506static const struct attribute_group *rbd_snap_attr_groups[] = {
1507 &rbd_snap_attr_group,
1508 NULL
1509};
1510
1511static struct device_type rbd_snap_device_type = {
1512 .groups = rbd_snap_attr_groups,
1513 .release = rbd_snap_dev_release,
1514};
1515
1516static void __rbd_remove_snap_dev(struct rbd_device *rbd_dev,
1517 struct rbd_snap *snap)
1518{
1519 list_del(&snap->node);
1520 device_unregister(&snap->dev);
1521}
1522
1523static int rbd_register_snap_dev(struct rbd_device *rbd_dev,
1524 struct rbd_snap *snap,
1525 struct device *parent)
1526{
1527 struct device *dev = &snap->dev;
1528 int ret;
1529
1530 dev->type = &rbd_snap_device_type;
1531 dev->parent = parent;
1532 dev->release = rbd_snap_dev_release;
1533 dev_set_name(dev, "snap_%s", snap->name);
1534 ret = device_register(dev);
1535
1536 return ret;
1537}
1538
1539static int __rbd_add_snap_dev(struct rbd_device *rbd_dev,
1540 int i, const char *name,
1541 struct rbd_snap **snapp)
1542{
1543 int ret;
1544 struct rbd_snap *snap = kzalloc(sizeof(*snap), GFP_KERNEL);
1545 if (!snap)
1546 return -ENOMEM;
1547 snap->name = kstrdup(name, GFP_KERNEL);
1548 snap->size = rbd_dev->header.snap_sizes[i];
1549 snap->id = rbd_dev->header.snapc->snaps[i];
1550 if (device_is_registered(&rbd_dev->dev)) {
1551 ret = rbd_register_snap_dev(rbd_dev, snap,
1552 &rbd_dev->dev);
1553 if (ret < 0)
1554 goto err;
1555 }
1556 *snapp = snap;
1557 return 0;
1558err:
1559 kfree(snap->name);
1560 kfree(snap);
1561 return ret;
1562}
1563
1564/*
1565 * search for the previous snap in a null delimited string list
1566 */
1567const char *rbd_prev_snap_name(const char *name, const char *start)
1568{
1569 if (name < start + 2)
1570 return NULL;
1571
1572 name -= 2;
1573 while (*name) {
1574 if (name == start)
1575 return start;
1576 name--;
1577 }
1578 return name + 1;
1579}
1580
1581/*
1582 * compare the old list of snapshots that we have to what's in the header
1583 * and update it accordingly. Note that the header holds the snapshots
1584 * in a reverse order (from newest to oldest) and we need to go from
1585 * older to new so that we don't get a duplicate snap name when
1586 * doing the process (e.g., removed snapshot and recreated a new
1587 * one with the same name.
1588 */
1589static int __rbd_init_snaps_header(struct rbd_device *rbd_dev)
1590{
1591 const char *name, *first_name;
1592 int i = rbd_dev->header.total_snaps;
1593 struct rbd_snap *snap, *old_snap = NULL;
1594 int ret;
1595 struct list_head *p, *n;
1596
1597 first_name = rbd_dev->header.snap_names;
1598 name = first_name + rbd_dev->header.snap_names_len;
1599
1600 list_for_each_prev_safe(p, n, &rbd_dev->snaps) {
1601 u64 cur_id;
1602
1603 old_snap = list_entry(p, struct rbd_snap, node);
1604
1605 if (i)
1606 cur_id = rbd_dev->header.snapc->snaps[i - 1];
1607
1608 if (!i || old_snap->id < cur_id) {
1609 /* old_snap->id was skipped, thus was removed */
1610 __rbd_remove_snap_dev(rbd_dev, old_snap);
1611 continue;
1612 }
1613 if (old_snap->id == cur_id) {
1614 /* we have this snapshot already */
1615 i--;
1616 name = rbd_prev_snap_name(name, first_name);
1617 continue;
1618 }
1619 for (; i > 0;
1620 i--, name = rbd_prev_snap_name(name, first_name)) {
1621 if (!name) {
1622 WARN_ON(1);
1623 return -EINVAL;
1624 }
1625 cur_id = rbd_dev->header.snapc->snaps[i];
1626 /* snapshot removal? handle it above */
1627 if (cur_id >= old_snap->id)
1628 break;
1629 /* a new snapshot */
1630 ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
1631 if (ret < 0)
1632 return ret;
1633
1634 /* note that we add it backward so using n and not p */
1635 list_add(&snap->node, n);
1636 p = &snap->node;
1637 }
1638 }
1639 /* we're done going over the old snap list, just add what's left */
1640 for (; i > 0; i--) {
1641 name = rbd_prev_snap_name(name, first_name);
1642 if (!name) {
1643 WARN_ON(1);
1644 return -EINVAL;
1645 }
1646 ret = __rbd_add_snap_dev(rbd_dev, i - 1, name, &snap);
1647 if (ret < 0)
1648 return ret;
1649 list_add(&snap->node, &rbd_dev->snaps);
1650 }
1651
1652 return 0;
1653}
1654
1655
1656static void rbd_root_dev_release(struct device *dev)
1657{
1658}
1659
1660static struct device rbd_root_dev = {
1661 .init_name = "rbd",
1662 .release = rbd_root_dev_release,
1663};
1664
1665static int rbd_bus_add_dev(struct rbd_device *rbd_dev)
1666{
1667 int ret = -ENOMEM;
1668 struct device *dev;
1669 struct rbd_snap *snap;
1670
1671 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1672 dev = &rbd_dev->dev;
1673
1674 dev->bus = &rbd_bus_type;
1675 dev->type = &rbd_device_type;
1676 dev->parent = &rbd_root_dev;
1677 dev->release = rbd_dev_release;
1678 dev_set_name(dev, "%d", rbd_dev->id);
1679 ret = device_register(dev);
1680 if (ret < 0)
1681 goto done_free;
1682
1683 list_for_each_entry(snap, &rbd_dev->snaps, node) {
1684 ret = rbd_register_snap_dev(rbd_dev, snap,
1685 &rbd_dev->dev);
1686 if (ret < 0)
1384 break; 1687 break;
1385 } 1688 }
1386 1689
1387 mutex_unlock(&ctl_mutex); 1690 mutex_unlock(&ctl_mutex);
1388 return n; 1691 return 0;
1692done_free:
1693 mutex_unlock(&ctl_mutex);
1694 return ret;
1389} 1695}
1390 1696
1391static ssize_t class_rbd_add(struct class *c, 1697static void rbd_bus_del_dev(struct rbd_device *rbd_dev)
1392 struct class_attribute *attr, 1698{
1393 const char *buf, size_t count) 1699 device_unregister(&rbd_dev->dev);
1700}
1701
1702static ssize_t rbd_add(struct bus_type *bus, const char *buf, size_t count)
1394{ 1703{
1395 struct ceph_osd_client *osdc; 1704 struct ceph_osd_client *osdc;
1396 struct rbd_device *rbd_dev; 1705 struct rbd_device *rbd_dev;
@@ -1419,6 +1728,7 @@ static ssize_t class_rbd_add(struct class *c,
1419 /* static rbd_device initialization */ 1728 /* static rbd_device initialization */
1420 spin_lock_init(&rbd_dev->lock); 1729 spin_lock_init(&rbd_dev->lock);
1421 INIT_LIST_HEAD(&rbd_dev->node); 1730 INIT_LIST_HEAD(&rbd_dev->node);
1731 INIT_LIST_HEAD(&rbd_dev->snaps);
1422 1732
1423 /* generate unique id: find highest unique id, add one */ 1733 /* generate unique id: find highest unique id, add one */
1424 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 1734 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
@@ -1478,6 +1788,9 @@ static ssize_t class_rbd_add(struct class *c,
1478 } 1788 }
1479 rbd_dev->major = irc; 1789 rbd_dev->major = irc;
1480 1790
1791 rc = rbd_bus_add_dev(rbd_dev);
1792 if (rc)
1793 goto err_out_disk;
1481 /* set up and announce blkdev mapping */ 1794 /* set up and announce blkdev mapping */
1482 rc = rbd_init_disk(rbd_dev); 1795 rc = rbd_init_disk(rbd_dev);
1483 if (rc) 1796 if (rc)
@@ -1487,6 +1800,8 @@ static ssize_t class_rbd_add(struct class *c,
1487 1800
1488err_out_blkdev: 1801err_out_blkdev:
1489 unregister_blkdev(rbd_dev->major, rbd_dev->name); 1802 unregister_blkdev(rbd_dev->major, rbd_dev->name);
1803err_out_disk:
1804 rbd_free_disk(rbd_dev);
1490err_out_client: 1805err_out_client:
1491 rbd_put_client(rbd_dev); 1806 rbd_put_client(rbd_dev);
1492 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 1807 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
@@ -1518,35 +1833,10 @@ static struct rbd_device *__rbd_get_dev(unsigned long id)
1518 return NULL; 1833 return NULL;
1519} 1834}
1520 1835
1521static ssize_t class_rbd_remove(struct class *c, 1836static void rbd_dev_release(struct device *dev)
1522 struct class_attribute *attr,
1523 const char *buf,
1524 size_t count)
1525{ 1837{
1526 struct rbd_device *rbd_dev = NULL; 1838 struct rbd_device *rbd_dev =
1527 int target_id, rc; 1839 container_of(dev, struct rbd_device, dev);
1528 unsigned long ul;
1529
1530 rc = strict_strtoul(buf, 10, &ul);
1531 if (rc)
1532 return rc;
1533
1534 /* convert to int; abort if we lost anything in the conversion */
1535 target_id = (int) ul;
1536 if (target_id != ul)
1537 return -EINVAL;
1538
1539 /* remove object from list immediately */
1540 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1541
1542 rbd_dev = __rbd_get_dev(target_id);
1543 if (rbd_dev)
1544 list_del_init(&rbd_dev->node);
1545
1546 mutex_unlock(&ctl_mutex);
1547
1548 if (!rbd_dev)
1549 return -ENOENT;
1550 1840
1551 rbd_put_client(rbd_dev); 1841 rbd_put_client(rbd_dev);
1552 1842
@@ -1557,67 +1847,11 @@ static ssize_t class_rbd_remove(struct class *c,
1557 1847
1558 /* release module ref */ 1848 /* release module ref */
1559 module_put(THIS_MODULE); 1849 module_put(THIS_MODULE);
1560
1561 return count;
1562} 1850}
1563 1851
1564static ssize_t class_rbd_snaps_list(struct class *c, 1852static ssize_t rbd_remove(struct bus_type *bus,
1565 struct class_attribute *attr, 1853 const char *buf,
1566 char *data) 1854 size_t count)
1567{
1568 struct rbd_device *rbd_dev = NULL;
1569 struct list_head *tmp;
1570 struct rbd_image_header *header;
1571 int i, n = 0, max = PAGE_SIZE;
1572 int ret;
1573
1574 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1575
1576 n += snprintf(data, max, "#id\tsnap\tKB\n");
1577
1578 list_for_each(tmp, &rbd_dev_list) {
1579 char *names, *p;
1580 struct ceph_snap_context *snapc;
1581
1582 rbd_dev = list_entry(tmp, struct rbd_device, node);
1583 header = &rbd_dev->header;
1584
1585 down_read(&header->snap_rwsem);
1586
1587 names = header->snap_names;
1588 snapc = header->snapc;
1589
1590 n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
1591 rbd_dev->id, RBD_SNAP_HEAD_NAME,
1592 header->image_size >> 10,
1593 (!rbd_dev->cur_snap ? " (*)" : ""));
1594 if (n == max)
1595 break;
1596
1597 p = names;
1598 for (i = 0; i < header->total_snaps; i++, p += strlen(p) + 1) {
1599 n += snprintf(data + n, max - n, "%d\t%s\t%lld%s\n",
1600 rbd_dev->id, p, header->snap_sizes[i] >> 10,
1601 (rbd_dev->cur_snap &&
1602 (snap_index(header, i) == rbd_dev->cur_snap) ?
1603 " (*)" : ""));
1604 if (n == max)
1605 break;
1606 }
1607
1608 up_read(&header->snap_rwsem);
1609 }
1610
1611
1612 ret = n;
1613 mutex_unlock(&ctl_mutex);
1614 return ret;
1615}
1616
1617static ssize_t class_rbd_snaps_refresh(struct class *c,
1618 struct class_attribute *attr,
1619 const char *buf,
1620 size_t count)
1621{ 1855{
1622 struct rbd_device *rbd_dev = NULL; 1856 struct rbd_device *rbd_dev = NULL;
1623 int target_id, rc; 1857 int target_id, rc;
@@ -1641,95 +1875,70 @@ static ssize_t class_rbd_snaps_refresh(struct class *c,
1641 goto done; 1875 goto done;
1642 } 1876 }
1643 1877
1644 rc = rbd_update_snaps(rbd_dev); 1878 list_del_init(&rbd_dev->node);
1645 if (rc < 0) 1879
1646 ret = rc; 1880 __rbd_remove_all_snaps(rbd_dev);
1881 rbd_bus_del_dev(rbd_dev);
1647 1882
1648done: 1883done:
1649 mutex_unlock(&ctl_mutex); 1884 mutex_unlock(&ctl_mutex);
1650 return ret; 1885 return ret;
1651} 1886}
1652 1887
1653static ssize_t class_rbd_snap_create(struct class *c, 1888static ssize_t rbd_snap_add(struct device *dev,
1654 struct class_attribute *attr, 1889 struct device_attribute *attr,
1655 const char *buf, 1890 const char *buf,
1656 size_t count) 1891 size_t count)
1657{ 1892{
1658 struct rbd_device *rbd_dev = NULL; 1893 struct rbd_device *rbd_dev = dev_to_rbd(dev);
1659 int target_id, ret; 1894 int ret;
1660 char *name; 1895 char *name = kmalloc(count + 1, GFP_KERNEL);
1661
1662 name = kmalloc(RBD_MAX_SNAP_NAME_LEN + 1, GFP_KERNEL);
1663 if (!name) 1896 if (!name)
1664 return -ENOMEM; 1897 return -ENOMEM;
1665 1898
1666 /* parse snaps add command */ 1899 snprintf(name, count, "%s", buf);
1667 if (sscanf(buf, "%d "
1668 "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
1669 &target_id,
1670 name) != 2) {
1671 ret = -EINVAL;
1672 goto done;
1673 }
1674 1900
1675 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 1901 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1676 1902
1677 rbd_dev = __rbd_get_dev(target_id);
1678 if (!rbd_dev) {
1679 ret = -ENOENT;
1680 goto done_unlock;
1681 }
1682
1683 ret = rbd_header_add_snap(rbd_dev, 1903 ret = rbd_header_add_snap(rbd_dev,
1684 name, GFP_KERNEL); 1904 name, GFP_KERNEL);
1685 if (ret < 0) 1905 if (ret < 0)
1686 goto done_unlock; 1906 goto done_unlock;
1687 1907
1688 ret = rbd_update_snaps(rbd_dev); 1908 ret = __rbd_update_snaps(rbd_dev);
1689 if (ret < 0) 1909 if (ret < 0)
1690 goto done_unlock; 1910 goto done_unlock;
1691 1911
1692 ret = count; 1912 ret = count;
1693done_unlock: 1913done_unlock:
1694 mutex_unlock(&ctl_mutex); 1914 mutex_unlock(&ctl_mutex);
1695done:
1696 kfree(name); 1915 kfree(name);
1697 return ret; 1916 return ret;
1698} 1917}
1699 1918
1700static ssize_t class_rbd_rollback(struct class *c, 1919static ssize_t rbd_snap_rollback(struct device *dev,
1701 struct class_attribute *attr, 1920 struct device_attribute *attr,
1702 const char *buf, 1921 const char *buf,
1703 size_t count) 1922 size_t count)
1704{ 1923{
1705 struct rbd_device *rbd_dev = NULL; 1924 struct rbd_device *rbd_dev = dev_to_rbd(dev);
1706 int target_id, ret; 1925 int ret;
1707 u64 snapid; 1926 u64 snapid;
1708 char snap_name[RBD_MAX_SNAP_NAME_LEN];
1709 u64 cur_ofs; 1927 u64 cur_ofs;
1710 char *seg_name; 1928 char *seg_name = NULL;
1929 char *snap_name = kmalloc(count + 1, GFP_KERNEL);
1930 ret = -ENOMEM;
1931 if (!snap_name)
1932 return ret;
1711 1933
1712 /* parse snaps add command */ 1934 /* parse snaps add command */
1713 if (sscanf(buf, "%d " 1935 snprintf(snap_name, count, "%s", buf);
1714 "%" __stringify(RBD_MAX_SNAP_NAME_LEN) "s",
1715 &target_id,
1716 snap_name) != 2) {
1717 return -EINVAL;
1718 }
1719
1720 ret = -ENOMEM;
1721 seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO); 1936 seg_name = kmalloc(RBD_MAX_SEG_NAME_LEN + 1, GFP_NOIO);
1722 if (!seg_name) 1937 if (!seg_name)
1723 return ret; 1938 goto done;
1724 1939
1725 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 1940 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
1726 1941
1727 rbd_dev = __rbd_get_dev(target_id);
1728 if (!rbd_dev) {
1729 ret = -ENOENT;
1730 goto done_unlock;
1731 }
1732
1733 ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL); 1942 ret = snap_by_name(&rbd_dev->header, snap_name, &snapid, NULL);
1734 if (ret < 0) 1943 if (ret < 0)
1735 goto done_unlock; 1944 goto done_unlock;
@@ -1750,7 +1959,7 @@ static ssize_t class_rbd_rollback(struct class *c,
1750 seg_name, ret); 1959 seg_name, ret);
1751 } 1960 }
1752 1961
1753 ret = rbd_update_snaps(rbd_dev); 1962 ret = __rbd_update_snaps(rbd_dev);
1754 if (ret < 0) 1963 if (ret < 0)
1755 goto done_unlock; 1964 goto done_unlock;
1756 1965
@@ -1758,57 +1967,42 @@ static ssize_t class_rbd_rollback(struct class *c,
1758 1967
1759done_unlock: 1968done_unlock:
1760 mutex_unlock(&ctl_mutex); 1969 mutex_unlock(&ctl_mutex);
1970done:
1761 kfree(seg_name); 1971 kfree(seg_name);
1972 kfree(snap_name);
1762 1973
1763 return ret; 1974 return ret;
1764} 1975}
1765 1976
1766static struct class_attribute class_rbd_attrs[] = { 1977static struct bus_attribute rbd_bus_attrs[] = {
1767 __ATTR(add, 0200, NULL, class_rbd_add), 1978 __ATTR(add, S_IWUSR, NULL, rbd_add),
1768 __ATTR(remove, 0200, NULL, class_rbd_remove), 1979 __ATTR(remove, S_IWUSR, NULL, rbd_remove),
1769 __ATTR(list, 0444, class_rbd_list, NULL),
1770 __ATTR(snaps_refresh, 0200, NULL, class_rbd_snaps_refresh),
1771 __ATTR(snap_create, 0200, NULL, class_rbd_snap_create),
1772 __ATTR(snaps_list, 0444, class_rbd_snaps_list, NULL),
1773 __ATTR(snap_rollback, 0200, NULL, class_rbd_rollback),
1774 __ATTR_NULL 1980 __ATTR_NULL
1775}; 1981};
1776 1982
1777/* 1983/*
1778 * create control files in sysfs 1984 * create control files in sysfs
1779 * /sys/class/rbd/... 1985 * /sys/bus/rbd/...
1780 */ 1986 */
1781static int rbd_sysfs_init(void) 1987static int rbd_sysfs_init(void)
1782{ 1988{
1783 int ret = -ENOMEM; 1989 int ret;
1784 1990
1785 class_rbd = kzalloc(sizeof(*class_rbd), GFP_KERNEL); 1991 rbd_bus_type.bus_attrs = rbd_bus_attrs;
1786 if (!class_rbd)
1787 goto out;
1788 1992
1789 class_rbd->name = DRV_NAME; 1993 ret = bus_register(&rbd_bus_type);
1790 class_rbd->owner = THIS_MODULE; 1994 if (ret < 0)
1791 class_rbd->class_release = class_rbd_release; 1995 return ret;
1792 class_rbd->class_attrs = class_rbd_attrs;
1793 1996
1794 ret = class_register(class_rbd); 1997 ret = device_register(&rbd_root_dev);
1795 if (ret)
1796 goto out_class;
1797 return 0;
1798 1998
1799out_class:
1800 kfree(class_rbd);
1801 class_rbd = NULL;
1802 pr_err(DRV_NAME ": failed to create class rbd\n");
1803out:
1804 return ret; 1999 return ret;
1805} 2000}
1806 2001
1807static void rbd_sysfs_cleanup(void) 2002static void rbd_sysfs_cleanup(void)
1808{ 2003{
1809 if (class_rbd) 2004 device_unregister(&rbd_root_dev);
1810 class_destroy(class_rbd); 2005 bus_unregister(&rbd_bus_type);
1811 class_rbd = NULL;
1812} 2006}
1813 2007
1814int __init rbd_init(void) 2008int __init rbd_init(void)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 255035cfc88a..657873e4328d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -65,14 +65,14 @@ enum blkif_state {
65 65
66struct blk_shadow { 66struct blk_shadow {
67 struct blkif_request req; 67 struct blkif_request req;
68 unsigned long request; 68 struct request *request;
69 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 69 unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
70}; 70};
71 71
72static DEFINE_MUTEX(blkfront_mutex); 72static DEFINE_MUTEX(blkfront_mutex);
73static const struct block_device_operations xlvbd_block_fops; 73static const struct block_device_operations xlvbd_block_fops;
74 74
75#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE) 75#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
76 76
77/* 77/*
78 * We have one of these per vbd, whether ide, scsi or 'other'. They 78 * We have one of these per vbd, whether ide, scsi or 'other'. They
@@ -136,7 +136,7 @@ static void add_id_to_freelist(struct blkfront_info *info,
136 unsigned long id) 136 unsigned long id)
137{ 137{
138 info->shadow[id].req.id = info->shadow_free; 138 info->shadow[id].req.id = info->shadow_free;
139 info->shadow[id].request = 0; 139 info->shadow[id].request = NULL;
140 info->shadow_free = id; 140 info->shadow_free = id;
141} 141}
142 142
@@ -245,14 +245,11 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
245} 245}
246 246
247/* 247/*
248 * blkif_queue_request 248 * Generate a Xen blkfront IO request from a blk layer request. Reads
249 * and writes are handled as expected. Since we lack a loose flush
250 * request, we map flushes into a full ordered barrier.
249 * 251 *
250 * request block io 252 * @req: a request struct
251 *
252 * id: for guest use only.
253 * operation: BLKIF_OP_{READ,WRITE,PROBE}
254 * buffer: buffer to read/write into. this should be a
255 * virtual address in the guest os.
256 */ 253 */
257static int blkif_queue_request(struct request *req) 254static int blkif_queue_request(struct request *req)
258{ 255{
@@ -281,7 +278,7 @@ static int blkif_queue_request(struct request *req)
281 /* Fill out a communications ring structure. */ 278 /* Fill out a communications ring structure. */
282 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); 279 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
283 id = get_id_from_freelist(info); 280 id = get_id_from_freelist(info);
284 info->shadow[id].request = (unsigned long)req; 281 info->shadow[id].request = req;
285 282
286 ring_req->id = id; 283 ring_req->id = id;
287 ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req); 284 ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -290,6 +287,18 @@ static int blkif_queue_request(struct request *req)
290 ring_req->operation = rq_data_dir(req) ? 287 ring_req->operation = rq_data_dir(req) ?
291 BLKIF_OP_WRITE : BLKIF_OP_READ; 288 BLKIF_OP_WRITE : BLKIF_OP_READ;
292 289
290 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
291 /*
292 * Ideally we could just do an unordered
293 * flush-to-disk, but all we have is a full write
294 * barrier at the moment. However, a barrier write is
295 * a superset of FUA, so we can implement it the same
296 * way. (It's also a FLUSH+FUA, since it is
297 * guaranteed ordered WRT previous writes.)
298 */
299 ring_req->operation = BLKIF_OP_WRITE_BARRIER;
300 }
301
293 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); 302 ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
294 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); 303 BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
295 304
@@ -634,7 +643,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
634 643
635 bret = RING_GET_RESPONSE(&info->ring, i); 644 bret = RING_GET_RESPONSE(&info->ring, i);
636 id = bret->id; 645 id = bret->id;
637 req = (struct request *)info->shadow[id].request; 646 req = info->shadow[id].request;
638 647
639 blkif_completion(&info->shadow[id]); 648 blkif_completion(&info->shadow[id]);
640 649
@@ -647,6 +656,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
647 printk(KERN_WARNING "blkfront: %s: write barrier op failed\n", 656 printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
648 info->gd->disk_name); 657 info->gd->disk_name);
649 error = -EOPNOTSUPP; 658 error = -EOPNOTSUPP;
659 }
660 if (unlikely(bret->status == BLKIF_RSP_ERROR &&
661 info->shadow[id].req.nr_segments == 0)) {
662 printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n",
663 info->gd->disk_name);
664 error = -EOPNOTSUPP;
665 }
666 if (unlikely(error)) {
667 if (error == -EOPNOTSUPP)
668 error = 0;
650 info->feature_flush = 0; 669 info->feature_flush = 0;
651 xlvbd_flush(info); 670 xlvbd_flush(info);
652 } 671 }
@@ -899,7 +918,7 @@ static int blkif_recover(struct blkfront_info *info)
899 /* Stage 3: Find pending requests and requeue them. */ 918 /* Stage 3: Find pending requests and requeue them. */
900 for (i = 0; i < BLK_RING_SIZE; i++) { 919 for (i = 0; i < BLK_RING_SIZE; i++) {
901 /* Not in use? */ 920 /* Not in use? */
902 if (copy[i].request == 0) 921 if (!copy[i].request)
903 continue; 922 continue;
904 923
905 /* Grab a request slot and copy shadow state into it. */ 924 /* Grab a request slot and copy shadow state into it. */
@@ -916,9 +935,7 @@ static int blkif_recover(struct blkfront_info *info)
916 req->seg[j].gref, 935 req->seg[j].gref,
917 info->xbdev->otherend_id, 936 info->xbdev->otherend_id,
918 pfn_to_mfn(info->shadow[req->id].frame[j]), 937 pfn_to_mfn(info->shadow[req->id].frame[j]),
919 rq_data_dir( 938 rq_data_dir(info->shadow[req->id].request));
920 (struct request *)
921 info->shadow[req->id].request));
922 info->shadow[req->id].req = *req; 939 info->shadow[req->id].req = *req;
923 940
924 info->ring.req_prod_pvt++; 941 info->ring.req_prod_pvt++;
@@ -1067,14 +1084,8 @@ static void blkfront_connect(struct blkfront_info *info)
1067 */ 1084 */
1068 info->feature_flush = 0; 1085 info->feature_flush = 0;
1069 1086
1070 /*
1071 * The driver doesn't properly handled empty flushes, so
1072 * lets disable barrier support for now.
1073 */
1074#if 0
1075 if (!err && barrier) 1087 if (!err && barrier)
1076 info->feature_flush = REQ_FLUSH; 1088 info->feature_flush = REQ_FLUSH | REQ_FUA;
1077#endif
1078 1089
1079 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); 1090 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
1080 if (err) { 1091 if (err) {