diff options
author | Jens Axboe <axboe@kernel.dk> | 2013-07-02 02:31:48 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2013-07-02 02:31:48 -0400 |
commit | 5f0e5afa0de4522abb3ea7d1369039b94e740ec5 (patch) | |
tree | 6a5be3db9ecfed8ef2150c6146f6d1e0d658ac8b /drivers/block | |
parent | d752b2696072ed52fd5afab08b601e2220a3b87e (diff) | |
parent | 9e895ace5d82df8929b16f58e9f515f6d54ab82d (diff) |
Merge tag 'v3.10-rc7' into for-3.11/drivers
Linux 3.10-rc7
Pull this in early to avoid doing it with the bcache merge,
since there are a number of changes to bcache between my old
base (3.10-rc1) and the new pull request.
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/brd.c | 4 | ||||
-rw-r--r-- | drivers/block/cciss.c | 32 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 8 | ||||
-rw-r--r-- | drivers/block/nvme-core.c | 62 | ||||
-rw-r--r-- | drivers/block/nvme-scsi.c | 3 | ||||
-rw-r--r-- | drivers/block/pktcdvd.c | 3 | ||||
-rw-r--r-- | drivers/block/rbd.c | 974 | ||||
-rw-r--r-- | drivers/block/xsysace.c | 3 |
8 files changed, 647 insertions, 442 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index f1a29f8e9d33..9bf4371755f2 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -117,13 +117,13 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector) | |||
117 | 117 | ||
118 | spin_lock(&brd->brd_lock); | 118 | spin_lock(&brd->brd_lock); |
119 | idx = sector >> PAGE_SECTORS_SHIFT; | 119 | idx = sector >> PAGE_SECTORS_SHIFT; |
120 | page->index = idx; | ||
120 | if (radix_tree_insert(&brd->brd_pages, idx, page)) { | 121 | if (radix_tree_insert(&brd->brd_pages, idx, page)) { |
121 | __free_page(page); | 122 | __free_page(page); |
122 | page = radix_tree_lookup(&brd->brd_pages, idx); | 123 | page = radix_tree_lookup(&brd->brd_pages, idx); |
123 | BUG_ON(!page); | 124 | BUG_ON(!page); |
124 | BUG_ON(page->index != idx); | 125 | BUG_ON(page->index != idx); |
125 | } else | 126 | } |
126 | page->index = idx; | ||
127 | spin_unlock(&brd->brd_lock); | 127 | spin_unlock(&brd->brd_lock); |
128 | 128 | ||
129 | radix_tree_preload_end(); | 129 | radix_tree_preload_end(); |
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 6374dc103521..62b6c2cc80b5 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -168,8 +168,6 @@ static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id); | |||
168 | static int cciss_open(struct block_device *bdev, fmode_t mode); | 168 | static int cciss_open(struct block_device *bdev, fmode_t mode); |
169 | static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode); | 169 | static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode); |
170 | static void cciss_release(struct gendisk *disk, fmode_t mode); | 170 | static void cciss_release(struct gendisk *disk, fmode_t mode); |
171 | static int do_ioctl(struct block_device *bdev, fmode_t mode, | ||
172 | unsigned int cmd, unsigned long arg); | ||
173 | static int cciss_ioctl(struct block_device *bdev, fmode_t mode, | 171 | static int cciss_ioctl(struct block_device *bdev, fmode_t mode, |
174 | unsigned int cmd, unsigned long arg); | 172 | unsigned int cmd, unsigned long arg); |
175 | static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo); | 173 | static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo); |
@@ -235,7 +233,7 @@ static const struct block_device_operations cciss_fops = { | |||
235 | .owner = THIS_MODULE, | 233 | .owner = THIS_MODULE, |
236 | .open = cciss_unlocked_open, | 234 | .open = cciss_unlocked_open, |
237 | .release = cciss_release, | 235 | .release = cciss_release, |
238 | .ioctl = do_ioctl, | 236 | .ioctl = cciss_ioctl, |
239 | .getgeo = cciss_getgeo, | 237 | .getgeo = cciss_getgeo, |
240 | #ifdef CONFIG_COMPAT | 238 | #ifdef CONFIG_COMPAT |
241 | .compat_ioctl = cciss_compat_ioctl, | 239 | .compat_ioctl = cciss_compat_ioctl, |
@@ -1143,16 +1141,6 @@ static void cciss_release(struct gendisk *disk, fmode_t mode) | |||
1143 | mutex_unlock(&cciss_mutex); | 1141 | mutex_unlock(&cciss_mutex); |
1144 | } | 1142 | } |
1145 | 1143 | ||
1146 | static int do_ioctl(struct block_device *bdev, fmode_t mode, | ||
1147 | unsigned cmd, unsigned long arg) | ||
1148 | { | ||
1149 | int ret; | ||
1150 | mutex_lock(&cciss_mutex); | ||
1151 | ret = cciss_ioctl(bdev, mode, cmd, arg); | ||
1152 | mutex_unlock(&cciss_mutex); | ||
1153 | return ret; | ||
1154 | } | ||
1155 | |||
1156 | #ifdef CONFIG_COMPAT | 1144 | #ifdef CONFIG_COMPAT |
1157 | 1145 | ||
1158 | static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode, | 1146 | static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode, |
@@ -1179,7 +1167,7 @@ static int cciss_compat_ioctl(struct block_device *bdev, fmode_t mode, | |||
1179 | case CCISS_REGNEWD: | 1167 | case CCISS_REGNEWD: |
1180 | case CCISS_RESCANDISK: | 1168 | case CCISS_RESCANDISK: |
1181 | case CCISS_GETLUNINFO: | 1169 | case CCISS_GETLUNINFO: |
1182 | return do_ioctl(bdev, mode, cmd, arg); | 1170 | return cciss_ioctl(bdev, mode, cmd, arg); |
1183 | 1171 | ||
1184 | case CCISS_PASSTHRU32: | 1172 | case CCISS_PASSTHRU32: |
1185 | return cciss_ioctl32_passthru(bdev, mode, cmd, arg); | 1173 | return cciss_ioctl32_passthru(bdev, mode, cmd, arg); |
@@ -1219,7 +1207,7 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode, | |||
1219 | if (err) | 1207 | if (err) |
1220 | return -EFAULT; | 1208 | return -EFAULT; |
1221 | 1209 | ||
1222 | err = do_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p); | 1210 | err = cciss_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p); |
1223 | if (err) | 1211 | if (err) |
1224 | return err; | 1212 | return err; |
1225 | err |= | 1213 | err |= |
@@ -1261,7 +1249,7 @@ static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode, | |||
1261 | if (err) | 1249 | if (err) |
1262 | return -EFAULT; | 1250 | return -EFAULT; |
1263 | 1251 | ||
1264 | err = do_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p); | 1252 | err = cciss_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p); |
1265 | if (err) | 1253 | if (err) |
1266 | return err; | 1254 | return err; |
1267 | err |= | 1255 | err |= |
@@ -1311,11 +1299,14 @@ static int cciss_getpciinfo(ctlr_info_t *h, void __user *argp) | |||
1311 | static int cciss_getintinfo(ctlr_info_t *h, void __user *argp) | 1299 | static int cciss_getintinfo(ctlr_info_t *h, void __user *argp) |
1312 | { | 1300 | { |
1313 | cciss_coalint_struct intinfo; | 1301 | cciss_coalint_struct intinfo; |
1302 | unsigned long flags; | ||
1314 | 1303 | ||
1315 | if (!argp) | 1304 | if (!argp) |
1316 | return -EINVAL; | 1305 | return -EINVAL; |
1306 | spin_lock_irqsave(&h->lock, flags); | ||
1317 | intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay); | 1307 | intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay); |
1318 | intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount); | 1308 | intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount); |
1309 | spin_unlock_irqrestore(&h->lock, flags); | ||
1319 | if (copy_to_user | 1310 | if (copy_to_user |
1320 | (argp, &intinfo, sizeof(cciss_coalint_struct))) | 1311 | (argp, &intinfo, sizeof(cciss_coalint_struct))) |
1321 | return -EFAULT; | 1312 | return -EFAULT; |
@@ -1356,12 +1347,15 @@ static int cciss_setintinfo(ctlr_info_t *h, void __user *argp) | |||
1356 | static int cciss_getnodename(ctlr_info_t *h, void __user *argp) | 1347 | static int cciss_getnodename(ctlr_info_t *h, void __user *argp) |
1357 | { | 1348 | { |
1358 | NodeName_type NodeName; | 1349 | NodeName_type NodeName; |
1350 | unsigned long flags; | ||
1359 | int i; | 1351 | int i; |
1360 | 1352 | ||
1361 | if (!argp) | 1353 | if (!argp) |
1362 | return -EINVAL; | 1354 | return -EINVAL; |
1355 | spin_lock_irqsave(&h->lock, flags); | ||
1363 | for (i = 0; i < 16; i++) | 1356 | for (i = 0; i < 16; i++) |
1364 | NodeName[i] = readb(&h->cfgtable->ServerName[i]); | 1357 | NodeName[i] = readb(&h->cfgtable->ServerName[i]); |
1358 | spin_unlock_irqrestore(&h->lock, flags); | ||
1365 | if (copy_to_user(argp, NodeName, sizeof(NodeName_type))) | 1359 | if (copy_to_user(argp, NodeName, sizeof(NodeName_type))) |
1366 | return -EFAULT; | 1360 | return -EFAULT; |
1367 | return 0; | 1361 | return 0; |
@@ -1398,10 +1392,13 @@ static int cciss_setnodename(ctlr_info_t *h, void __user *argp) | |||
1398 | static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp) | 1392 | static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp) |
1399 | { | 1393 | { |
1400 | Heartbeat_type heartbeat; | 1394 | Heartbeat_type heartbeat; |
1395 | unsigned long flags; | ||
1401 | 1396 | ||
1402 | if (!argp) | 1397 | if (!argp) |
1403 | return -EINVAL; | 1398 | return -EINVAL; |
1399 | spin_lock_irqsave(&h->lock, flags); | ||
1404 | heartbeat = readl(&h->cfgtable->HeartBeat); | 1400 | heartbeat = readl(&h->cfgtable->HeartBeat); |
1401 | spin_unlock_irqrestore(&h->lock, flags); | ||
1405 | if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type))) | 1402 | if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type))) |
1406 | return -EFAULT; | 1403 | return -EFAULT; |
1407 | return 0; | 1404 | return 0; |
@@ -1410,10 +1407,13 @@ static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp) | |||
1410 | static int cciss_getbustypes(ctlr_info_t *h, void __user *argp) | 1407 | static int cciss_getbustypes(ctlr_info_t *h, void __user *argp) |
1411 | { | 1408 | { |
1412 | BusTypes_type BusTypes; | 1409 | BusTypes_type BusTypes; |
1410 | unsigned long flags; | ||
1413 | 1411 | ||
1414 | if (!argp) | 1412 | if (!argp) |
1415 | return -EINVAL; | 1413 | return -EINVAL; |
1414 | spin_lock_irqsave(&h->lock, flags); | ||
1416 | BusTypes = readl(&h->cfgtable->BusTypes); | 1415 | BusTypes = readl(&h->cfgtable->BusTypes); |
1416 | spin_unlock_irqrestore(&h->lock, flags); | ||
1417 | if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type))) | 1417 | if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type))) |
1418 | return -EFAULT; | 1418 | return -EFAULT; |
1419 | return 0; | 1419 | return 0; |
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 847107ef0cce..20dd52a2f92f 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c | |||
@@ -3002,7 +3002,8 @@ static int mtip_hw_debugfs_init(struct driver_data *dd) | |||
3002 | 3002 | ||
3003 | static void mtip_hw_debugfs_exit(struct driver_data *dd) | 3003 | static void mtip_hw_debugfs_exit(struct driver_data *dd) |
3004 | { | 3004 | { |
3005 | debugfs_remove_recursive(dd->dfs_node); | 3005 | if (dd->dfs_node) |
3006 | debugfs_remove_recursive(dd->dfs_node); | ||
3006 | } | 3007 | } |
3007 | 3008 | ||
3008 | 3009 | ||
@@ -3863,7 +3864,7 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) | |||
3863 | struct driver_data *dd = queue->queuedata; | 3864 | struct driver_data *dd = queue->queuedata; |
3864 | struct scatterlist *sg; | 3865 | struct scatterlist *sg; |
3865 | struct bio_vec *bvec; | 3866 | struct bio_vec *bvec; |
3866 | int nents = 0; | 3867 | int i, nents = 0; |
3867 | int tag = 0, unaligned = 0; | 3868 | int tag = 0, unaligned = 0; |
3868 | 3869 | ||
3869 | if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { | 3870 | if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { |
@@ -3921,11 +3922,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) | |||
3921 | } | 3922 | } |
3922 | 3923 | ||
3923 | /* Create the scatter list for this bio. */ | 3924 | /* Create the scatter list for this bio. */ |
3924 | bio_for_each_segment(bvec, bio, nents) { | 3925 | bio_for_each_segment(bvec, bio, i) { |
3925 | sg_set_page(&sg[nents], | 3926 | sg_set_page(&sg[nents], |
3926 | bvec->bv_page, | 3927 | bvec->bv_page, |
3927 | bvec->bv_len, | 3928 | bvec->bv_len, |
3928 | bvec->bv_offset); | 3929 | bvec->bv_offset); |
3930 | nents++; | ||
3929 | } | 3931 | } |
3930 | 3932 | ||
3931 | /* Issue the read/write. */ | 3933 | /* Issue the read/write. */ |
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 8efdfaa44a59..ce79a590b45b 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c | |||
@@ -629,7 +629,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
629 | struct nvme_command *cmnd; | 629 | struct nvme_command *cmnd; |
630 | struct nvme_iod *iod; | 630 | struct nvme_iod *iod; |
631 | enum dma_data_direction dma_dir; | 631 | enum dma_data_direction dma_dir; |
632 | int cmdid, length, result = -ENOMEM; | 632 | int cmdid, length, result; |
633 | u16 control; | 633 | u16 control; |
634 | u32 dsmgmt; | 634 | u32 dsmgmt; |
635 | int psegs = bio_phys_segments(ns->queue, bio); | 635 | int psegs = bio_phys_segments(ns->queue, bio); |
@@ -640,6 +640,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns, | |||
640 | return result; | 640 | return result; |
641 | } | 641 | } |
642 | 642 | ||
643 | result = -ENOMEM; | ||
643 | iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC); | 644 | iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC); |
644 | if (!iod) | 645 | if (!iod) |
645 | goto nomem; | 646 | goto nomem; |
@@ -977,6 +978,8 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout) | |||
977 | 978 | ||
978 | if (timeout && !time_after(now, info[cmdid].timeout)) | 979 | if (timeout && !time_after(now, info[cmdid].timeout)) |
979 | continue; | 980 | continue; |
981 | if (info[cmdid].ctx == CMD_CTX_CANCELLED) | ||
982 | continue; | ||
980 | dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); | 983 | dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); |
981 | ctx = cancel_cmdid(nvmeq, cmdid, &fn); | 984 | ctx = cancel_cmdid(nvmeq, cmdid, &fn); |
982 | fn(nvmeq->dev, ctx, &cqe); | 985 | fn(nvmeq->dev, ctx, &cqe); |
@@ -1206,7 +1209,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, | |||
1206 | 1209 | ||
1207 | if (addr & 3) | 1210 | if (addr & 3) |
1208 | return ERR_PTR(-EINVAL); | 1211 | return ERR_PTR(-EINVAL); |
1209 | if (!length) | 1212 | if (!length || length > INT_MAX - PAGE_SIZE) |
1210 | return ERR_PTR(-EINVAL); | 1213 | return ERR_PTR(-EINVAL); |
1211 | 1214 | ||
1212 | offset = offset_in_page(addr); | 1215 | offset = offset_in_page(addr); |
@@ -1227,7 +1230,8 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, | |||
1227 | sg_init_table(sg, count); | 1230 | sg_init_table(sg, count); |
1228 | for (i = 0; i < count; i++) { | 1231 | for (i = 0; i < count; i++) { |
1229 | sg_set_page(&sg[i], pages[i], | 1232 | sg_set_page(&sg[i], pages[i], |
1230 | min_t(int, length, PAGE_SIZE - offset), offset); | 1233 | min_t(unsigned, length, PAGE_SIZE - offset), |
1234 | offset); | ||
1231 | length -= (PAGE_SIZE - offset); | 1235 | length -= (PAGE_SIZE - offset); |
1232 | offset = 0; | 1236 | offset = 0; |
1233 | } | 1237 | } |
@@ -1435,7 +1439,7 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev, | |||
1435 | nvme_free_iod(dev, iod); | 1439 | nvme_free_iod(dev, iod); |
1436 | } | 1440 | } |
1437 | 1441 | ||
1438 | if (!status && copy_to_user(&ucmd->result, &cmd.result, | 1442 | if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result, |
1439 | sizeof(cmd.result))) | 1443 | sizeof(cmd.result))) |
1440 | status = -EFAULT; | 1444 | status = -EFAULT; |
1441 | 1445 | ||
@@ -1633,7 +1637,8 @@ static int set_queue_count(struct nvme_dev *dev, int count) | |||
1633 | 1637 | ||
1634 | static int nvme_setup_io_queues(struct nvme_dev *dev) | 1638 | static int nvme_setup_io_queues(struct nvme_dev *dev) |
1635 | { | 1639 | { |
1636 | int result, cpu, i, nr_io_queues, db_bar_size, q_depth; | 1640 | struct pci_dev *pdev = dev->pci_dev; |
1641 | int result, cpu, i, nr_io_queues, db_bar_size, q_depth, q_count; | ||
1637 | 1642 | ||
1638 | nr_io_queues = num_online_cpus(); | 1643 | nr_io_queues = num_online_cpus(); |
1639 | result = set_queue_count(dev, nr_io_queues); | 1644 | result = set_queue_count(dev, nr_io_queues); |
@@ -1642,14 +1647,14 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
1642 | if (result < nr_io_queues) | 1647 | if (result < nr_io_queues) |
1643 | nr_io_queues = result; | 1648 | nr_io_queues = result; |
1644 | 1649 | ||
1650 | q_count = nr_io_queues; | ||
1645 | /* Deregister the admin queue's interrupt */ | 1651 | /* Deregister the admin queue's interrupt */ |
1646 | free_irq(dev->entry[0].vector, dev->queues[0]); | 1652 | free_irq(dev->entry[0].vector, dev->queues[0]); |
1647 | 1653 | ||
1648 | db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3)); | 1654 | db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3)); |
1649 | if (db_bar_size > 8192) { | 1655 | if (db_bar_size > 8192) { |
1650 | iounmap(dev->bar); | 1656 | iounmap(dev->bar); |
1651 | dev->bar = ioremap(pci_resource_start(dev->pci_dev, 0), | 1657 | dev->bar = ioremap(pci_resource_start(pdev, 0), db_bar_size); |
1652 | db_bar_size); | ||
1653 | dev->dbs = ((void __iomem *)dev->bar) + 4096; | 1658 | dev->dbs = ((void __iomem *)dev->bar) + 4096; |
1654 | dev->queues[0]->q_db = dev->dbs; | 1659 | dev->queues[0]->q_db = dev->dbs; |
1655 | } | 1660 | } |
@@ -1657,19 +1662,36 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) | |||
1657 | for (i = 0; i < nr_io_queues; i++) | 1662 | for (i = 0; i < nr_io_queues; i++) |
1658 | dev->entry[i].entry = i; | 1663 | dev->entry[i].entry = i; |
1659 | for (;;) { | 1664 | for (;;) { |
1660 | result = pci_enable_msix(dev->pci_dev, dev->entry, | 1665 | result = pci_enable_msix(pdev, dev->entry, nr_io_queues); |
1661 | nr_io_queues); | ||
1662 | if (result == 0) { | 1666 | if (result == 0) { |
1663 | break; | 1667 | break; |
1664 | } else if (result > 0) { | 1668 | } else if (result > 0) { |
1665 | nr_io_queues = result; | 1669 | nr_io_queues = result; |
1666 | continue; | 1670 | continue; |
1667 | } else { | 1671 | } else { |
1668 | nr_io_queues = 1; | 1672 | nr_io_queues = 0; |
1669 | break; | 1673 | break; |
1670 | } | 1674 | } |
1671 | } | 1675 | } |
1672 | 1676 | ||
1677 | if (nr_io_queues == 0) { | ||
1678 | nr_io_queues = q_count; | ||
1679 | for (;;) { | ||
1680 | result = pci_enable_msi_block(pdev, nr_io_queues); | ||
1681 | if (result == 0) { | ||
1682 | for (i = 0; i < nr_io_queues; i++) | ||
1683 | dev->entry[i].vector = i + pdev->irq; | ||
1684 | break; | ||
1685 | } else if (result > 0) { | ||
1686 | nr_io_queues = result; | ||
1687 | continue; | ||
1688 | } else { | ||
1689 | nr_io_queues = 1; | ||
1690 | break; | ||
1691 | } | ||
1692 | } | ||
1693 | } | ||
1694 | |||
1673 | result = queue_request_irq(dev, dev->queues[0], "nvme admin"); | 1695 | result = queue_request_irq(dev, dev->queues[0], "nvme admin"); |
1674 | /* XXX: handle failure here */ | 1696 | /* XXX: handle failure here */ |
1675 | 1697 | ||
@@ -1850,7 +1872,10 @@ static void nvme_free_dev(struct kref *kref) | |||
1850 | { | 1872 | { |
1851 | struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); | 1873 | struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); |
1852 | nvme_dev_remove(dev); | 1874 | nvme_dev_remove(dev); |
1853 | pci_disable_msix(dev->pci_dev); | 1875 | if (dev->pci_dev->msi_enabled) |
1876 | pci_disable_msi(dev->pci_dev); | ||
1877 | else if (dev->pci_dev->msix_enabled) | ||
1878 | pci_disable_msix(dev->pci_dev); | ||
1854 | iounmap(dev->bar); | 1879 | iounmap(dev->bar); |
1855 | nvme_release_instance(dev); | 1880 | nvme_release_instance(dev); |
1856 | nvme_release_prp_pools(dev); | 1881 | nvme_release_prp_pools(dev); |
@@ -1923,8 +1948,14 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
1923 | INIT_LIST_HEAD(&dev->namespaces); | 1948 | INIT_LIST_HEAD(&dev->namespaces); |
1924 | dev->pci_dev = pdev; | 1949 | dev->pci_dev = pdev; |
1925 | pci_set_drvdata(pdev, dev); | 1950 | pci_set_drvdata(pdev, dev); |
1926 | dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); | 1951 | |
1927 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); | 1952 | if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) |
1953 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); | ||
1954 | else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) | ||
1955 | dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); | ||
1956 | else | ||
1957 | goto disable; | ||
1958 | |||
1928 | result = nvme_set_instance(dev); | 1959 | result = nvme_set_instance(dev); |
1929 | if (result) | 1960 | if (result) |
1930 | goto disable; | 1961 | goto disable; |
@@ -1977,7 +2008,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
1977 | unmap: | 2008 | unmap: |
1978 | iounmap(dev->bar); | 2009 | iounmap(dev->bar); |
1979 | disable_msix: | 2010 | disable_msix: |
1980 | pci_disable_msix(pdev); | 2011 | if (dev->pci_dev->msi_enabled) |
2012 | pci_disable_msi(dev->pci_dev); | ||
2013 | else if (dev->pci_dev->msix_enabled) | ||
2014 | pci_disable_msix(dev->pci_dev); | ||
1981 | nvme_release_instance(dev); | 2015 | nvme_release_instance(dev); |
1982 | nvme_release_prp_pools(dev); | 2016 | nvme_release_prp_pools(dev); |
1983 | disable: | 2017 | disable: |
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c index fed54b039893..102de2f52b5c 100644 --- a/drivers/block/nvme-scsi.c +++ b/drivers/block/nvme-scsi.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <linux/sched.h> | 44 | #include <linux/sched.h> |
45 | #include <linux/slab.h> | 45 | #include <linux/slab.h> |
46 | #include <linux/types.h> | 46 | #include <linux/types.h> |
47 | #include <linux/version.h> | ||
48 | #include <scsi/sg.h> | 47 | #include <scsi/sg.h> |
49 | #include <scsi/scsi.h> | 48 | #include <scsi/scsi.h> |
50 | 49 | ||
@@ -1654,7 +1653,7 @@ static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list, | |||
1654 | } | 1653 | } |
1655 | } | 1654 | } |
1656 | 1655 | ||
1657 | static u16 nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, | 1656 | static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, |
1658 | u8 *mode_page, u8 page_code) | 1657 | u8 *mode_page, u8 page_code) |
1659 | { | 1658 | { |
1660 | int res = SNTI_TRANSLATION_SUCCESS; | 1659 | int res = SNTI_TRANSLATION_SUCCESS; |
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 3c08983e600a..f5d0ea11d9fd 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c | |||
@@ -83,7 +83,8 @@ | |||
83 | 83 | ||
84 | #define MAX_SPEED 0xffff | 84 | #define MAX_SPEED 0xffff |
85 | 85 | ||
86 | #define ZONE(sector, pd) (((sector) + (pd)->offset) & ~((pd)->settings.size - 1)) | 86 | #define ZONE(sector, pd) (((sector) + (pd)->offset) & \ |
87 | ~(sector_t)((pd)->settings.size - 1)) | ||
87 | 88 | ||
88 | static DEFINE_MUTEX(pktcdvd_mutex); | 89 | static DEFINE_MUTEX(pktcdvd_mutex); |
89 | static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; | 90 | static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; |
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ca63104136e0..49394e3f31bc 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c | |||
@@ -55,6 +55,39 @@ | |||
55 | #define SECTOR_SHIFT 9 | 55 | #define SECTOR_SHIFT 9 |
56 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) | 56 | #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) |
57 | 57 | ||
58 | /* | ||
59 | * Increment the given counter and return its updated value. | ||
60 | * If the counter is already 0 it will not be incremented. | ||
61 | * If the counter is already at its maximum value returns | ||
62 | * -EINVAL without updating it. | ||
63 | */ | ||
64 | static int atomic_inc_return_safe(atomic_t *v) | ||
65 | { | ||
66 | unsigned int counter; | ||
67 | |||
68 | counter = (unsigned int)__atomic_add_unless(v, 1, 0); | ||
69 | if (counter <= (unsigned int)INT_MAX) | ||
70 | return (int)counter; | ||
71 | |||
72 | atomic_dec(v); | ||
73 | |||
74 | return -EINVAL; | ||
75 | } | ||
76 | |||
77 | /* Decrement the counter. Return the resulting value, or -EINVAL */ | ||
78 | static int atomic_dec_return_safe(atomic_t *v) | ||
79 | { | ||
80 | int counter; | ||
81 | |||
82 | counter = atomic_dec_return(v); | ||
83 | if (counter >= 0) | ||
84 | return counter; | ||
85 | |||
86 | atomic_inc(v); | ||
87 | |||
88 | return -EINVAL; | ||
89 | } | ||
90 | |||
58 | #define RBD_DRV_NAME "rbd" | 91 | #define RBD_DRV_NAME "rbd" |
59 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" | 92 | #define RBD_DRV_NAME_LONG "rbd (rados block device)" |
60 | 93 | ||
@@ -100,21 +133,20 @@ | |||
100 | * block device image metadata (in-memory version) | 133 | * block device image metadata (in-memory version) |
101 | */ | 134 | */ |
102 | struct rbd_image_header { | 135 | struct rbd_image_header { |
103 | /* These four fields never change for a given rbd image */ | 136 | /* These six fields never change for a given rbd image */ |
104 | char *object_prefix; | 137 | char *object_prefix; |
105 | u64 features; | ||
106 | __u8 obj_order; | 138 | __u8 obj_order; |
107 | __u8 crypt_type; | 139 | __u8 crypt_type; |
108 | __u8 comp_type; | 140 | __u8 comp_type; |
141 | u64 stripe_unit; | ||
142 | u64 stripe_count; | ||
143 | u64 features; /* Might be changeable someday? */ | ||
109 | 144 | ||
110 | /* The remaining fields need to be updated occasionally */ | 145 | /* The remaining fields need to be updated occasionally */ |
111 | u64 image_size; | 146 | u64 image_size; |
112 | struct ceph_snap_context *snapc; | 147 | struct ceph_snap_context *snapc; |
113 | char *snap_names; | 148 | char *snap_names; /* format 1 only */ |
114 | u64 *snap_sizes; | 149 | u64 *snap_sizes; /* format 1 only */ |
115 | |||
116 | u64 stripe_unit; | ||
117 | u64 stripe_count; | ||
118 | }; | 150 | }; |
119 | 151 | ||
120 | /* | 152 | /* |
@@ -225,6 +257,7 @@ struct rbd_obj_request { | |||
225 | }; | 257 | }; |
226 | }; | 258 | }; |
227 | struct page **copyup_pages; | 259 | struct page **copyup_pages; |
260 | u32 copyup_page_count; | ||
228 | 261 | ||
229 | struct ceph_osd_request *osd_req; | 262 | struct ceph_osd_request *osd_req; |
230 | 263 | ||
@@ -257,6 +290,7 @@ struct rbd_img_request { | |||
257 | struct rbd_obj_request *obj_request; /* obj req initiator */ | 290 | struct rbd_obj_request *obj_request; /* obj req initiator */ |
258 | }; | 291 | }; |
259 | struct page **copyup_pages; | 292 | struct page **copyup_pages; |
293 | u32 copyup_page_count; | ||
260 | spinlock_t completion_lock;/* protects next_completion */ | 294 | spinlock_t completion_lock;/* protects next_completion */ |
261 | u32 next_completion; | 295 | u32 next_completion; |
262 | rbd_img_callback_t callback; | 296 | rbd_img_callback_t callback; |
@@ -311,6 +345,7 @@ struct rbd_device { | |||
311 | 345 | ||
312 | struct rbd_spec *parent_spec; | 346 | struct rbd_spec *parent_spec; |
313 | u64 parent_overlap; | 347 | u64 parent_overlap; |
348 | atomic_t parent_ref; | ||
314 | struct rbd_device *parent; | 349 | struct rbd_device *parent; |
315 | 350 | ||
316 | /* protects updating the header */ | 351 | /* protects updating the header */ |
@@ -359,7 +394,8 @@ static ssize_t rbd_add(struct bus_type *bus, const char *buf, | |||
359 | size_t count); | 394 | size_t count); |
360 | static ssize_t rbd_remove(struct bus_type *bus, const char *buf, | 395 | static ssize_t rbd_remove(struct bus_type *bus, const char *buf, |
361 | size_t count); | 396 | size_t count); |
362 | static int rbd_dev_image_probe(struct rbd_device *rbd_dev); | 397 | static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping); |
398 | static void rbd_spec_put(struct rbd_spec *spec); | ||
363 | 399 | ||
364 | static struct bus_attribute rbd_bus_attrs[] = { | 400 | static struct bus_attribute rbd_bus_attrs[] = { |
365 | __ATTR(add, S_IWUSR, NULL, rbd_add), | 401 | __ATTR(add, S_IWUSR, NULL, rbd_add), |
@@ -426,7 +462,8 @@ static void rbd_img_parent_read(struct rbd_obj_request *obj_request); | |||
426 | static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); | 462 | static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); |
427 | 463 | ||
428 | static int rbd_dev_refresh(struct rbd_device *rbd_dev); | 464 | static int rbd_dev_refresh(struct rbd_device *rbd_dev); |
429 | static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev); | 465 | static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev); |
466 | static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev); | ||
430 | static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, | 467 | static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, |
431 | u64 snap_id); | 468 | u64 snap_id); |
432 | static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, | 469 | static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, |
@@ -482,8 +519,8 @@ static const struct block_device_operations rbd_bd_ops = { | |||
482 | }; | 519 | }; |
483 | 520 | ||
484 | /* | 521 | /* |
485 | * Initialize an rbd client instance. | 522 | * Initialize an rbd client instance. Success or not, this function |
486 | * We own *ceph_opts. | 523 | * consumes ceph_opts. |
487 | */ | 524 | */ |
488 | static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) | 525 | static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) |
489 | { | 526 | { |
@@ -638,7 +675,8 @@ static int parse_rbd_opts_token(char *c, void *private) | |||
638 | 675 | ||
639 | /* | 676 | /* |
640 | * Get a ceph client with specific addr and configuration, if one does | 677 | * Get a ceph client with specific addr and configuration, if one does |
641 | * not exist create it. | 678 | * not exist create it. Either way, ceph_opts is consumed by this |
679 | * function. | ||
642 | */ | 680 | */ |
643 | static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) | 681 | static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) |
644 | { | 682 | { |
@@ -726,88 +764,123 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk) | |||
726 | } | 764 | } |
727 | 765 | ||
728 | /* | 766 | /* |
729 | * Create a new header structure, translate header format from the on-disk | 767 | * Fill an rbd image header with information from the given format 1 |
730 | * header. | 768 | * on-disk header. |
731 | */ | 769 | */ |
732 | static int rbd_header_from_disk(struct rbd_image_header *header, | 770 | static int rbd_header_from_disk(struct rbd_device *rbd_dev, |
733 | struct rbd_image_header_ondisk *ondisk) | 771 | struct rbd_image_header_ondisk *ondisk) |
734 | { | 772 | { |
773 | struct rbd_image_header *header = &rbd_dev->header; | ||
774 | bool first_time = header->object_prefix == NULL; | ||
775 | struct ceph_snap_context *snapc; | ||
776 | char *object_prefix = NULL; | ||
777 | char *snap_names = NULL; | ||
778 | u64 *snap_sizes = NULL; | ||
735 | u32 snap_count; | 779 | u32 snap_count; |
736 | size_t len; | ||
737 | size_t size; | 780 | size_t size; |
781 | int ret = -ENOMEM; | ||
738 | u32 i; | 782 | u32 i; |
739 | 783 | ||
740 | memset(header, 0, sizeof (*header)); | 784 | /* Allocate this now to avoid having to handle failure below */ |
741 | 785 | ||
742 | snap_count = le32_to_cpu(ondisk->snap_count); | 786 | if (first_time) { |
787 | size_t len; | ||
743 | 788 | ||
744 | len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix)); | 789 | len = strnlen(ondisk->object_prefix, |
745 | header->object_prefix = kmalloc(len + 1, GFP_KERNEL); | 790 | sizeof (ondisk->object_prefix)); |
746 | if (!header->object_prefix) | 791 | object_prefix = kmalloc(len + 1, GFP_KERNEL); |
747 | return -ENOMEM; | 792 | if (!object_prefix) |
748 | memcpy(header->object_prefix, ondisk->object_prefix, len); | 793 | return -ENOMEM; |
749 | header->object_prefix[len] = '\0'; | 794 | memcpy(object_prefix, ondisk->object_prefix, len); |
795 | object_prefix[len] = '\0'; | ||
796 | } | ||
797 | |||
798 | /* Allocate the snapshot context and fill it in */ | ||
750 | 799 | ||
800 | snap_count = le32_to_cpu(ondisk->snap_count); | ||
801 | snapc = ceph_create_snap_context(snap_count, GFP_KERNEL); | ||
802 | if (!snapc) | ||
803 | goto out_err; | ||
804 | snapc->seq = le64_to_cpu(ondisk->snap_seq); | ||
751 | if (snap_count) { | 805 | if (snap_count) { |
806 | struct rbd_image_snap_ondisk *snaps; | ||
752 | u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); | 807 | u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); |
753 | 808 | ||
754 | /* Save a copy of the snapshot names */ | 809 | /* We'll keep a copy of the snapshot names... */ |
755 | 810 | ||
756 | if (snap_names_len > (u64) SIZE_MAX) | 811 | if (snap_names_len > (u64)SIZE_MAX) |
757 | return -EIO; | 812 | goto out_2big; |
758 | header->snap_names = kmalloc(snap_names_len, GFP_KERNEL); | 813 | snap_names = kmalloc(snap_names_len, GFP_KERNEL); |
759 | if (!header->snap_names) | 814 | if (!snap_names) |
815 | goto out_err; | ||
816 | |||
817 | /* ...as well as the array of their sizes. */ | ||
818 | |||
819 | size = snap_count * sizeof (*header->snap_sizes); | ||
820 | snap_sizes = kmalloc(size, GFP_KERNEL); | ||
821 | if (!snap_sizes) | ||
760 | goto out_err; | 822 | goto out_err; |
823 | |||
761 | /* | 824 | /* |
762 | * Note that rbd_dev_v1_header_read() guarantees | 825 | * Copy the names, and fill in each snapshot's id |
763 | * the ondisk buffer we're working with has | 826 | * and size. |
827 | * | ||
828 | * Note that rbd_dev_v1_header_info() guarantees the | ||
829 | * ondisk buffer we're working with has | ||
764 | * snap_names_len bytes beyond the end of the | 830 | * snap_names_len bytes beyond the end of the |
765 | * snapshot id array, this memcpy() is safe. | 831 | * snapshot id array, this memcpy() is safe. |
766 | */ | 832 | */ |
767 | memcpy(header->snap_names, &ondisk->snaps[snap_count], | 833 | memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len); |
768 | snap_names_len); | 834 | snaps = ondisk->snaps; |
835 | for (i = 0; i < snap_count; i++) { | ||
836 | snapc->snaps[i] = le64_to_cpu(snaps[i].id); | ||
837 | snap_sizes[i] = le64_to_cpu(snaps[i].image_size); | ||
838 | } | ||
839 | } | ||
769 | 840 | ||
770 | /* Record each snapshot's size */ | 841 | /* We won't fail any more, fill in the header */ |
771 | 842 | ||
772 | size = snap_count * sizeof (*header->snap_sizes); | 843 | down_write(&rbd_dev->header_rwsem); |
773 | header->snap_sizes = kmalloc(size, GFP_KERNEL); | 844 | if (first_time) { |
774 | if (!header->snap_sizes) | 845 | header->object_prefix = object_prefix; |
775 | goto out_err; | 846 | header->obj_order = ondisk->options.order; |
776 | for (i = 0; i < snap_count; i++) | 847 | header->crypt_type = ondisk->options.crypt_type; |
777 | header->snap_sizes[i] = | 848 | header->comp_type = ondisk->options.comp_type; |
778 | le64_to_cpu(ondisk->snaps[i].image_size); | 849 | /* The rest aren't used for format 1 images */ |
850 | header->stripe_unit = 0; | ||
851 | header->stripe_count = 0; | ||
852 | header->features = 0; | ||
779 | } else { | 853 | } else { |
780 | header->snap_names = NULL; | 854 | ceph_put_snap_context(header->snapc); |
781 | header->snap_sizes = NULL; | 855 | kfree(header->snap_names); |
856 | kfree(header->snap_sizes); | ||
782 | } | 857 | } |
783 | 858 | ||
784 | header->features = 0; /* No features support in v1 images */ | 859 | /* The remaining fields always get updated (when we refresh) */ |
785 | header->obj_order = ondisk->options.order; | ||
786 | header->crypt_type = ondisk->options.crypt_type; | ||
787 | header->comp_type = ondisk->options.comp_type; | ||
788 | |||
789 | /* Allocate and fill in the snapshot context */ | ||
790 | 860 | ||
791 | header->image_size = le64_to_cpu(ondisk->image_size); | 861 | header->image_size = le64_to_cpu(ondisk->image_size); |
862 | header->snapc = snapc; | ||
863 | header->snap_names = snap_names; | ||
864 | header->snap_sizes = snap_sizes; | ||
792 | 865 | ||
793 | header->snapc = ceph_create_snap_context(snap_count, GFP_KERNEL); | 866 | /* Make sure mapping size is consistent with header info */ |
794 | if (!header->snapc) | ||
795 | goto out_err; | ||
796 | header->snapc->seq = le64_to_cpu(ondisk->snap_seq); | ||
797 | for (i = 0; i < snap_count; i++) | ||
798 | header->snapc->snaps[i] = le64_to_cpu(ondisk->snaps[i].id); | ||
799 | 867 | ||
800 | return 0; | 868 | if (rbd_dev->spec->snap_id == CEPH_NOSNAP || first_time) |
869 | if (rbd_dev->mapping.size != header->image_size) | ||
870 | rbd_dev->mapping.size = header->image_size; | ||
801 | 871 | ||
872 | up_write(&rbd_dev->header_rwsem); | ||
873 | |||
874 | return 0; | ||
875 | out_2big: | ||
876 | ret = -EIO; | ||
802 | out_err: | 877 | out_err: |
803 | kfree(header->snap_sizes); | 878 | kfree(snap_sizes); |
804 | header->snap_sizes = NULL; | 879 | kfree(snap_names); |
805 | kfree(header->snap_names); | 880 | ceph_put_snap_context(snapc); |
806 | header->snap_names = NULL; | 881 | kfree(object_prefix); |
807 | kfree(header->object_prefix); | ||
808 | header->object_prefix = NULL; | ||
809 | 882 | ||
810 | return -ENOMEM; | 883 | return ret; |
811 | } | 884 | } |
812 | 885 | ||
813 | static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which) | 886 | static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which) |
@@ -934,20 +1007,11 @@ static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id, | |||
934 | 1007 | ||
935 | static int rbd_dev_mapping_set(struct rbd_device *rbd_dev) | 1008 | static int rbd_dev_mapping_set(struct rbd_device *rbd_dev) |
936 | { | 1009 | { |
937 | const char *snap_name = rbd_dev->spec->snap_name; | 1010 | u64 snap_id = rbd_dev->spec->snap_id; |
938 | u64 snap_id; | ||
939 | u64 size = 0; | 1011 | u64 size = 0; |
940 | u64 features = 0; | 1012 | u64 features = 0; |
941 | int ret; | 1013 | int ret; |
942 | 1014 | ||
943 | if (strcmp(snap_name, RBD_SNAP_HEAD_NAME)) { | ||
944 | snap_id = rbd_snap_id_by_name(rbd_dev, snap_name); | ||
945 | if (snap_id == CEPH_NOSNAP) | ||
946 | return -ENOENT; | ||
947 | } else { | ||
948 | snap_id = CEPH_NOSNAP; | ||
949 | } | ||
950 | |||
951 | ret = rbd_snap_size(rbd_dev, snap_id, &size); | 1015 | ret = rbd_snap_size(rbd_dev, snap_id, &size); |
952 | if (ret) | 1016 | if (ret) |
953 | return ret; | 1017 | return ret; |
@@ -958,11 +1022,6 @@ static int rbd_dev_mapping_set(struct rbd_device *rbd_dev) | |||
958 | rbd_dev->mapping.size = size; | 1022 | rbd_dev->mapping.size = size; |
959 | rbd_dev->mapping.features = features; | 1023 | rbd_dev->mapping.features = features; |
960 | 1024 | ||
961 | /* If we are mapping a snapshot it must be marked read-only */ | ||
962 | |||
963 | if (snap_id != CEPH_NOSNAP) | ||
964 | rbd_dev->mapping.read_only = true; | ||
965 | |||
966 | return 0; | 1025 | return 0; |
967 | } | 1026 | } |
968 | 1027 | ||
@@ -970,14 +1029,6 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev) | |||
970 | { | 1029 | { |
971 | rbd_dev->mapping.size = 0; | 1030 | rbd_dev->mapping.size = 0; |
972 | rbd_dev->mapping.features = 0; | 1031 | rbd_dev->mapping.features = 0; |
973 | rbd_dev->mapping.read_only = true; | ||
974 | } | ||
975 | |||
976 | static void rbd_dev_clear_mapping(struct rbd_device *rbd_dev) | ||
977 | { | ||
978 | rbd_dev->mapping.size = 0; | ||
979 | rbd_dev->mapping.features = 0; | ||
980 | rbd_dev->mapping.read_only = true; | ||
981 | } | 1032 | } |
982 | 1033 | ||
983 | static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) | 1034 | static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) |
@@ -985,12 +1036,16 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) | |||
985 | char *name; | 1036 | char *name; |
986 | u64 segment; | 1037 | u64 segment; |
987 | int ret; | 1038 | int ret; |
1039 | char *name_format; | ||
988 | 1040 | ||
989 | name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO); | 1041 | name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO); |
990 | if (!name) | 1042 | if (!name) |
991 | return NULL; | 1043 | return NULL; |
992 | segment = offset >> rbd_dev->header.obj_order; | 1044 | segment = offset >> rbd_dev->header.obj_order; |
993 | ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, "%s.%012llx", | 1045 | name_format = "%s.%012llx"; |
1046 | if (rbd_dev->image_format == 2) | ||
1047 | name_format = "%s.%016llx"; | ||
1048 | ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, name_format, | ||
994 | rbd_dev->header.object_prefix, segment); | 1049 | rbd_dev->header.object_prefix, segment); |
995 | if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) { | 1050 | if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) { |
996 | pr_err("error formatting segment name for #%llu (%d)\n", | 1051 | pr_err("error formatting segment name for #%llu (%d)\n", |
@@ -1342,20 +1397,18 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request) | |||
1342 | kref_put(&obj_request->kref, rbd_obj_request_destroy); | 1397 | kref_put(&obj_request->kref, rbd_obj_request_destroy); |
1343 | } | 1398 | } |
1344 | 1399 | ||
1345 | static void rbd_img_request_get(struct rbd_img_request *img_request) | 1400 | static bool img_request_child_test(struct rbd_img_request *img_request); |
1346 | { | 1401 | static void rbd_parent_request_destroy(struct kref *kref); |
1347 | dout("%s: img %p (was %d)\n", __func__, img_request, | ||
1348 | atomic_read(&img_request->kref.refcount)); | ||
1349 | kref_get(&img_request->kref); | ||
1350 | } | ||
1351 | |||
1352 | static void rbd_img_request_destroy(struct kref *kref); | 1402 | static void rbd_img_request_destroy(struct kref *kref); |
1353 | static void rbd_img_request_put(struct rbd_img_request *img_request) | 1403 | static void rbd_img_request_put(struct rbd_img_request *img_request) |
1354 | { | 1404 | { |
1355 | rbd_assert(img_request != NULL); | 1405 | rbd_assert(img_request != NULL); |
1356 | dout("%s: img %p (was %d)\n", __func__, img_request, | 1406 | dout("%s: img %p (was %d)\n", __func__, img_request, |
1357 | atomic_read(&img_request->kref.refcount)); | 1407 | atomic_read(&img_request->kref.refcount)); |
1358 | kref_put(&img_request->kref, rbd_img_request_destroy); | 1408 | if (img_request_child_test(img_request)) |
1409 | kref_put(&img_request->kref, rbd_parent_request_destroy); | ||
1410 | else | ||
1411 | kref_put(&img_request->kref, rbd_img_request_destroy); | ||
1359 | } | 1412 | } |
1360 | 1413 | ||
1361 | static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, | 1414 | static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, |
@@ -1472,6 +1525,12 @@ static void img_request_child_set(struct rbd_img_request *img_request) | |||
1472 | smp_mb(); | 1525 | smp_mb(); |
1473 | } | 1526 | } |
1474 | 1527 | ||
1528 | static void img_request_child_clear(struct rbd_img_request *img_request) | ||
1529 | { | ||
1530 | clear_bit(IMG_REQ_CHILD, &img_request->flags); | ||
1531 | smp_mb(); | ||
1532 | } | ||
1533 | |||
1475 | static bool img_request_child_test(struct rbd_img_request *img_request) | 1534 | static bool img_request_child_test(struct rbd_img_request *img_request) |
1476 | { | 1535 | { |
1477 | smp_mb(); | 1536 | smp_mb(); |
@@ -1484,6 +1543,12 @@ static void img_request_layered_set(struct rbd_img_request *img_request) | |||
1484 | smp_mb(); | 1543 | smp_mb(); |
1485 | } | 1544 | } |
1486 | 1545 | ||
1546 | static void img_request_layered_clear(struct rbd_img_request *img_request) | ||
1547 | { | ||
1548 | clear_bit(IMG_REQ_LAYERED, &img_request->flags); | ||
1549 | smp_mb(); | ||
1550 | } | ||
1551 | |||
1487 | static bool img_request_layered_test(struct rbd_img_request *img_request) | 1552 | static bool img_request_layered_test(struct rbd_img_request *img_request) |
1488 | { | 1553 | { |
1489 | smp_mb(); | 1554 | smp_mb(); |
@@ -1827,6 +1892,74 @@ static void rbd_obj_request_destroy(struct kref *kref) | |||
1827 | kmem_cache_free(rbd_obj_request_cache, obj_request); | 1892 | kmem_cache_free(rbd_obj_request_cache, obj_request); |
1828 | } | 1893 | } |
1829 | 1894 | ||
1895 | /* It's OK to call this for a device with no parent */ | ||
1896 | |||
1897 | static void rbd_spec_put(struct rbd_spec *spec); | ||
1898 | static void rbd_dev_unparent(struct rbd_device *rbd_dev) | ||
1899 | { | ||
1900 | rbd_dev_remove_parent(rbd_dev); | ||
1901 | rbd_spec_put(rbd_dev->parent_spec); | ||
1902 | rbd_dev->parent_spec = NULL; | ||
1903 | rbd_dev->parent_overlap = 0; | ||
1904 | } | ||
1905 | |||
1906 | /* | ||
1907 | * Parent image reference counting is used to determine when an | ||
1908 | * image's parent fields can be safely torn down--after there are no | ||
1909 | * more in-flight requests to the parent image. When the last | ||
1910 | * reference is dropped, cleaning them up is safe. | ||
1911 | */ | ||
1912 | static void rbd_dev_parent_put(struct rbd_device *rbd_dev) | ||
1913 | { | ||
1914 | int counter; | ||
1915 | |||
1916 | if (!rbd_dev->parent_spec) | ||
1917 | return; | ||
1918 | |||
1919 | counter = atomic_dec_return_safe(&rbd_dev->parent_ref); | ||
1920 | if (counter > 0) | ||
1921 | return; | ||
1922 | |||
1923 | /* Last reference; clean up parent data structures */ | ||
1924 | |||
1925 | if (!counter) | ||
1926 | rbd_dev_unparent(rbd_dev); | ||
1927 | else | ||
1928 | rbd_warn(rbd_dev, "parent reference underflow\n"); | ||
1929 | } | ||
1930 | |||
1931 | /* | ||
1932 | * If an image has a non-zero parent overlap, get a reference to its | ||
1933 | * parent. | ||
1934 | * | ||
1935 | * We must get the reference before checking for the overlap to | ||
1936 | * coordinate properly with zeroing the parent overlap in | ||
1937 | * rbd_dev_v2_parent_info() when an image gets flattened. We | ||
1938 | * drop it again if there is no overlap. | ||
1939 | * | ||
1940 | * Returns true if the rbd device has a parent with a non-zero | ||
1941 | * overlap and a reference for it was successfully taken, or | ||
1942 | * false otherwise. | ||
1943 | */ | ||
1944 | static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) | ||
1945 | { | ||
1946 | int counter; | ||
1947 | |||
1948 | if (!rbd_dev->parent_spec) | ||
1949 | return false; | ||
1950 | |||
1951 | counter = atomic_inc_return_safe(&rbd_dev->parent_ref); | ||
1952 | if (counter > 0 && rbd_dev->parent_overlap) | ||
1953 | return true; | ||
1954 | |||
1955 | /* Image was flattened, but parent is not yet torn down */ | ||
1956 | |||
1957 | if (counter < 0) | ||
1958 | rbd_warn(rbd_dev, "parent reference overflow\n"); | ||
1959 | |||
1960 | return false; | ||
1961 | } | ||
1962 | |||
1830 | /* | 1963 | /* |
1831 | * Caller is responsible for filling in the list of object requests | 1964 | * Caller is responsible for filling in the list of object requests |
1832 | * that comprises the image request, and the Linux request pointer | 1965 | * that comprises the image request, and the Linux request pointer |
@@ -1835,8 +1968,7 @@ static void rbd_obj_request_destroy(struct kref *kref) | |||
1835 | static struct rbd_img_request *rbd_img_request_create( | 1968 | static struct rbd_img_request *rbd_img_request_create( |
1836 | struct rbd_device *rbd_dev, | 1969 | struct rbd_device *rbd_dev, |
1837 | u64 offset, u64 length, | 1970 | u64 offset, u64 length, |
1838 | bool write_request, | 1971 | bool write_request) |
1839 | bool child_request) | ||
1840 | { | 1972 | { |
1841 | struct rbd_img_request *img_request; | 1973 | struct rbd_img_request *img_request; |
1842 | 1974 | ||
@@ -1861,9 +1993,7 @@ static struct rbd_img_request *rbd_img_request_create( | |||
1861 | } else { | 1993 | } else { |
1862 | img_request->snap_id = rbd_dev->spec->snap_id; | 1994 | img_request->snap_id = rbd_dev->spec->snap_id; |
1863 | } | 1995 | } |
1864 | if (child_request) | 1996 | if (rbd_dev_parent_get(rbd_dev)) |
1865 | img_request_child_set(img_request); | ||
1866 | if (rbd_dev->parent_spec) | ||
1867 | img_request_layered_set(img_request); | 1997 | img_request_layered_set(img_request); |
1868 | spin_lock_init(&img_request->completion_lock); | 1998 | spin_lock_init(&img_request->completion_lock); |
1869 | img_request->next_completion = 0; | 1999 | img_request->next_completion = 0; |
@@ -1873,9 +2003,6 @@ static struct rbd_img_request *rbd_img_request_create( | |||
1873 | INIT_LIST_HEAD(&img_request->obj_requests); | 2003 | INIT_LIST_HEAD(&img_request->obj_requests); |
1874 | kref_init(&img_request->kref); | 2004 | kref_init(&img_request->kref); |
1875 | 2005 | ||
1876 | rbd_img_request_get(img_request); /* Avoid a warning */ | ||
1877 | rbd_img_request_put(img_request); /* TEMPORARY */ | ||
1878 | |||
1879 | dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, | 2006 | dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, |
1880 | write_request ? "write" : "read", offset, length, | 2007 | write_request ? "write" : "read", offset, length, |
1881 | img_request); | 2008 | img_request); |
@@ -1897,15 +2024,54 @@ static void rbd_img_request_destroy(struct kref *kref) | |||
1897 | rbd_img_obj_request_del(img_request, obj_request); | 2024 | rbd_img_obj_request_del(img_request, obj_request); |
1898 | rbd_assert(img_request->obj_request_count == 0); | 2025 | rbd_assert(img_request->obj_request_count == 0); |
1899 | 2026 | ||
2027 | if (img_request_layered_test(img_request)) { | ||
2028 | img_request_layered_clear(img_request); | ||
2029 | rbd_dev_parent_put(img_request->rbd_dev); | ||
2030 | } | ||
2031 | |||
1900 | if (img_request_write_test(img_request)) | 2032 | if (img_request_write_test(img_request)) |
1901 | ceph_put_snap_context(img_request->snapc); | 2033 | ceph_put_snap_context(img_request->snapc); |
1902 | 2034 | ||
1903 | if (img_request_child_test(img_request)) | ||
1904 | rbd_obj_request_put(img_request->obj_request); | ||
1905 | |||
1906 | kmem_cache_free(rbd_img_request_cache, img_request); | 2035 | kmem_cache_free(rbd_img_request_cache, img_request); |
1907 | } | 2036 | } |
1908 | 2037 | ||
2038 | static struct rbd_img_request *rbd_parent_request_create( | ||
2039 | struct rbd_obj_request *obj_request, | ||
2040 | u64 img_offset, u64 length) | ||
2041 | { | ||
2042 | struct rbd_img_request *parent_request; | ||
2043 | struct rbd_device *rbd_dev; | ||
2044 | |||
2045 | rbd_assert(obj_request->img_request); | ||
2046 | rbd_dev = obj_request->img_request->rbd_dev; | ||
2047 | |||
2048 | parent_request = rbd_img_request_create(rbd_dev->parent, | ||
2049 | img_offset, length, false); | ||
2050 | if (!parent_request) | ||
2051 | return NULL; | ||
2052 | |||
2053 | img_request_child_set(parent_request); | ||
2054 | rbd_obj_request_get(obj_request); | ||
2055 | parent_request->obj_request = obj_request; | ||
2056 | |||
2057 | return parent_request; | ||
2058 | } | ||
2059 | |||
2060 | static void rbd_parent_request_destroy(struct kref *kref) | ||
2061 | { | ||
2062 | struct rbd_img_request *parent_request; | ||
2063 | struct rbd_obj_request *orig_request; | ||
2064 | |||
2065 | parent_request = container_of(kref, struct rbd_img_request, kref); | ||
2066 | orig_request = parent_request->obj_request; | ||
2067 | |||
2068 | parent_request->obj_request = NULL; | ||
2069 | rbd_obj_request_put(orig_request); | ||
2070 | img_request_child_clear(parent_request); | ||
2071 | |||
2072 | rbd_img_request_destroy(kref); | ||
2073 | } | ||
2074 | |||
1909 | static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) | 2075 | static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) |
1910 | { | 2076 | { |
1911 | struct rbd_img_request *img_request; | 2077 | struct rbd_img_request *img_request; |
@@ -2114,7 +2280,7 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) | |||
2114 | { | 2280 | { |
2115 | struct rbd_img_request *img_request; | 2281 | struct rbd_img_request *img_request; |
2116 | struct rbd_device *rbd_dev; | 2282 | struct rbd_device *rbd_dev; |
2117 | u64 length; | 2283 | struct page **pages; |
2118 | u32 page_count; | 2284 | u32 page_count; |
2119 | 2285 | ||
2120 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO); | 2286 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO); |
@@ -2124,12 +2290,14 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) | |||
2124 | 2290 | ||
2125 | rbd_dev = img_request->rbd_dev; | 2291 | rbd_dev = img_request->rbd_dev; |
2126 | rbd_assert(rbd_dev); | 2292 | rbd_assert(rbd_dev); |
2127 | length = (u64)1 << rbd_dev->header.obj_order; | ||
2128 | page_count = (u32)calc_pages_for(0, length); | ||
2129 | 2293 | ||
2130 | rbd_assert(obj_request->copyup_pages); | 2294 | pages = obj_request->copyup_pages; |
2131 | ceph_release_page_vector(obj_request->copyup_pages, page_count); | 2295 | rbd_assert(pages != NULL); |
2132 | obj_request->copyup_pages = NULL; | 2296 | obj_request->copyup_pages = NULL; |
2297 | page_count = obj_request->copyup_page_count; | ||
2298 | rbd_assert(page_count); | ||
2299 | obj_request->copyup_page_count = 0; | ||
2300 | ceph_release_page_vector(pages, page_count); | ||
2133 | 2301 | ||
2134 | /* | 2302 | /* |
2135 | * We want the transfer count to reflect the size of the | 2303 | * We want the transfer count to reflect the size of the |
@@ -2153,9 +2321,11 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) | |||
2153 | struct ceph_osd_client *osdc; | 2321 | struct ceph_osd_client *osdc; |
2154 | struct rbd_device *rbd_dev; | 2322 | struct rbd_device *rbd_dev; |
2155 | struct page **pages; | 2323 | struct page **pages; |
2156 | int result; | 2324 | u32 page_count; |
2157 | u64 obj_size; | 2325 | int img_result; |
2158 | u64 xferred; | 2326 | u64 parent_length; |
2327 | u64 offset; | ||
2328 | u64 length; | ||
2159 | 2329 | ||
2160 | rbd_assert(img_request_child_test(img_request)); | 2330 | rbd_assert(img_request_child_test(img_request)); |
2161 | 2331 | ||
@@ -2164,46 +2334,74 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) | |||
2164 | pages = img_request->copyup_pages; | 2334 | pages = img_request->copyup_pages; |
2165 | rbd_assert(pages != NULL); | 2335 | rbd_assert(pages != NULL); |
2166 | img_request->copyup_pages = NULL; | 2336 | img_request->copyup_pages = NULL; |
2337 | page_count = img_request->copyup_page_count; | ||
2338 | rbd_assert(page_count); | ||
2339 | img_request->copyup_page_count = 0; | ||
2167 | 2340 | ||
2168 | orig_request = img_request->obj_request; | 2341 | orig_request = img_request->obj_request; |
2169 | rbd_assert(orig_request != NULL); | 2342 | rbd_assert(orig_request != NULL); |
2170 | rbd_assert(orig_request->type == OBJ_REQUEST_BIO); | 2343 | rbd_assert(obj_request_type_valid(orig_request->type)); |
2171 | result = img_request->result; | 2344 | img_result = img_request->result; |
2172 | obj_size = img_request->length; | 2345 | parent_length = img_request->length; |
2173 | xferred = img_request->xferred; | 2346 | rbd_assert(parent_length == img_request->xferred); |
2347 | rbd_img_request_put(img_request); | ||
2174 | 2348 | ||
2175 | rbd_dev = img_request->rbd_dev; | 2349 | rbd_assert(orig_request->img_request); |
2350 | rbd_dev = orig_request->img_request->rbd_dev; | ||
2176 | rbd_assert(rbd_dev); | 2351 | rbd_assert(rbd_dev); |
2177 | rbd_assert(obj_size == (u64)1 << rbd_dev->header.obj_order); | ||
2178 | 2352 | ||
2179 | rbd_img_request_put(img_request); | 2353 | /* |
2354 | * If the overlap has become 0 (most likely because the | ||
2355 | * image has been flattened) we need to free the pages | ||
2356 | * and re-submit the original write request. | ||
2357 | */ | ||
2358 | if (!rbd_dev->parent_overlap) { | ||
2359 | struct ceph_osd_client *osdc; | ||
2180 | 2360 | ||
2181 | if (result) | 2361 | ceph_release_page_vector(pages, page_count); |
2182 | goto out_err; | 2362 | osdc = &rbd_dev->rbd_client->client->osdc; |
2363 | img_result = rbd_obj_request_submit(osdc, orig_request); | ||
2364 | if (!img_result) | ||
2365 | return; | ||
2366 | } | ||
2183 | 2367 | ||
2184 | /* Allocate the new copyup osd request for the original request */ | 2368 | if (img_result) |
2369 | goto out_err; | ||
2185 | 2370 | ||
2186 | result = -ENOMEM; | 2371 | /* |
2187 | rbd_assert(!orig_request->osd_req); | 2372 | * The original osd request is of no use to use any more. |
2373 | * We need a new one that can hold the two ops in a copyup | ||
2374 | * request. Allocate the new copyup osd request for the | ||
2375 | * original request, and release the old one. | ||
2376 | */ | ||
2377 | img_result = -ENOMEM; | ||
2188 | osd_req = rbd_osd_req_create_copyup(orig_request); | 2378 | osd_req = rbd_osd_req_create_copyup(orig_request); |
2189 | if (!osd_req) | 2379 | if (!osd_req) |
2190 | goto out_err; | 2380 | goto out_err; |
2381 | rbd_osd_req_destroy(orig_request->osd_req); | ||
2191 | orig_request->osd_req = osd_req; | 2382 | orig_request->osd_req = osd_req; |
2192 | orig_request->copyup_pages = pages; | 2383 | orig_request->copyup_pages = pages; |
2384 | orig_request->copyup_page_count = page_count; | ||
2193 | 2385 | ||
2194 | /* Initialize the copyup op */ | 2386 | /* Initialize the copyup op */ |
2195 | 2387 | ||
2196 | osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup"); | 2388 | osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup"); |
2197 | osd_req_op_cls_request_data_pages(osd_req, 0, pages, obj_size, 0, | 2389 | osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0, |
2198 | false, false); | 2390 | false, false); |
2199 | 2391 | ||
2200 | /* Then the original write request op */ | 2392 | /* Then the original write request op */ |
2201 | 2393 | ||
2394 | offset = orig_request->offset; | ||
2395 | length = orig_request->length; | ||
2202 | osd_req_op_extent_init(osd_req, 1, CEPH_OSD_OP_WRITE, | 2396 | osd_req_op_extent_init(osd_req, 1, CEPH_OSD_OP_WRITE, |
2203 | orig_request->offset, | 2397 | offset, length, 0, 0); |
2204 | orig_request->length, 0, 0); | 2398 | if (orig_request->type == OBJ_REQUEST_BIO) |
2205 | osd_req_op_extent_osd_data_bio(osd_req, 1, orig_request->bio_list, | 2399 | osd_req_op_extent_osd_data_bio(osd_req, 1, |
2206 | orig_request->length); | 2400 | orig_request->bio_list, length); |
2401 | else | ||
2402 | osd_req_op_extent_osd_data_pages(osd_req, 1, | ||
2403 | orig_request->pages, length, | ||
2404 | offset & ~PAGE_MASK, false, false); | ||
2207 | 2405 | ||
2208 | rbd_osd_req_format_write(orig_request); | 2406 | rbd_osd_req_format_write(orig_request); |
2209 | 2407 | ||
@@ -2211,13 +2409,13 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) | |||
2211 | 2409 | ||
2212 | orig_request->callback = rbd_img_obj_copyup_callback; | 2410 | orig_request->callback = rbd_img_obj_copyup_callback; |
2213 | osdc = &rbd_dev->rbd_client->client->osdc; | 2411 | osdc = &rbd_dev->rbd_client->client->osdc; |
2214 | result = rbd_obj_request_submit(osdc, orig_request); | 2412 | img_result = rbd_obj_request_submit(osdc, orig_request); |
2215 | if (!result) | 2413 | if (!img_result) |
2216 | return; | 2414 | return; |
2217 | out_err: | 2415 | out_err: |
2218 | /* Record the error code and complete the request */ | 2416 | /* Record the error code and complete the request */ |
2219 | 2417 | ||
2220 | orig_request->result = result; | 2418 | orig_request->result = img_result; |
2221 | orig_request->xferred = 0; | 2419 | orig_request->xferred = 0; |
2222 | obj_request_done_set(orig_request); | 2420 | obj_request_done_set(orig_request); |
2223 | rbd_obj_request_complete(orig_request); | 2421 | rbd_obj_request_complete(orig_request); |
@@ -2249,7 +2447,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) | |||
2249 | int result; | 2447 | int result; |
2250 | 2448 | ||
2251 | rbd_assert(obj_request_img_data_test(obj_request)); | 2449 | rbd_assert(obj_request_img_data_test(obj_request)); |
2252 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO); | 2450 | rbd_assert(obj_request_type_valid(obj_request->type)); |
2253 | 2451 | ||
2254 | img_request = obj_request->img_request; | 2452 | img_request = obj_request->img_request; |
2255 | rbd_assert(img_request != NULL); | 2453 | rbd_assert(img_request != NULL); |
@@ -2257,15 +2455,6 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) | |||
2257 | rbd_assert(rbd_dev->parent != NULL); | 2455 | rbd_assert(rbd_dev->parent != NULL); |
2258 | 2456 | ||
2259 | /* | 2457 | /* |
2260 | * First things first. The original osd request is of no | ||
2261 | * use to use any more, we'll need a new one that can hold | ||
2262 | * the two ops in a copyup request. We'll get that later, | ||
2263 | * but for now we can release the old one. | ||
2264 | */ | ||
2265 | rbd_osd_req_destroy(obj_request->osd_req); | ||
2266 | obj_request->osd_req = NULL; | ||
2267 | |||
2268 | /* | ||
2269 | * Determine the byte range covered by the object in the | 2458 | * Determine the byte range covered by the object in the |
2270 | * child image to which the original request was to be sent. | 2459 | * child image to which the original request was to be sent. |
2271 | */ | 2460 | */ |
@@ -2295,18 +2484,16 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) | |||
2295 | } | 2484 | } |
2296 | 2485 | ||
2297 | result = -ENOMEM; | 2486 | result = -ENOMEM; |
2298 | parent_request = rbd_img_request_create(rbd_dev->parent, | 2487 | parent_request = rbd_parent_request_create(obj_request, |
2299 | img_offset, length, | 2488 | img_offset, length); |
2300 | false, true); | ||
2301 | if (!parent_request) | 2489 | if (!parent_request) |
2302 | goto out_err; | 2490 | goto out_err; |
2303 | rbd_obj_request_get(obj_request); | ||
2304 | parent_request->obj_request = obj_request; | ||
2305 | 2491 | ||
2306 | result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages); | 2492 | result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages); |
2307 | if (result) | 2493 | if (result) |
2308 | goto out_err; | 2494 | goto out_err; |
2309 | parent_request->copyup_pages = pages; | 2495 | parent_request->copyup_pages = pages; |
2496 | parent_request->copyup_page_count = page_count; | ||
2310 | 2497 | ||
2311 | parent_request->callback = rbd_img_obj_parent_read_full_callback; | 2498 | parent_request->callback = rbd_img_obj_parent_read_full_callback; |
2312 | result = rbd_img_request_submit(parent_request); | 2499 | result = rbd_img_request_submit(parent_request); |
@@ -2314,6 +2501,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request) | |||
2314 | return 0; | 2501 | return 0; |
2315 | 2502 | ||
2316 | parent_request->copyup_pages = NULL; | 2503 | parent_request->copyup_pages = NULL; |
2504 | parent_request->copyup_page_count = 0; | ||
2317 | parent_request->obj_request = NULL; | 2505 | parent_request->obj_request = NULL; |
2318 | rbd_obj_request_put(obj_request); | 2506 | rbd_obj_request_put(obj_request); |
2319 | out_err: | 2507 | out_err: |
@@ -2331,6 +2519,7 @@ out_err: | |||
2331 | static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) | 2519 | static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) |
2332 | { | 2520 | { |
2333 | struct rbd_obj_request *orig_request; | 2521 | struct rbd_obj_request *orig_request; |
2522 | struct rbd_device *rbd_dev; | ||
2334 | int result; | 2523 | int result; |
2335 | 2524 | ||
2336 | rbd_assert(!obj_request_img_data_test(obj_request)); | 2525 | rbd_assert(!obj_request_img_data_test(obj_request)); |
@@ -2353,8 +2542,21 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) | |||
2353 | obj_request->xferred, obj_request->length); | 2542 | obj_request->xferred, obj_request->length); |
2354 | rbd_obj_request_put(obj_request); | 2543 | rbd_obj_request_put(obj_request); |
2355 | 2544 | ||
2356 | rbd_assert(orig_request); | 2545 | /* |
2357 | rbd_assert(orig_request->img_request); | 2546 | * If the overlap has become 0 (most likely because the |
2547 | * image has been flattened) we need to free the pages | ||
2548 | * and re-submit the original write request. | ||
2549 | */ | ||
2550 | rbd_dev = orig_request->img_request->rbd_dev; | ||
2551 | if (!rbd_dev->parent_overlap) { | ||
2552 | struct ceph_osd_client *osdc; | ||
2553 | |||
2554 | rbd_obj_request_put(orig_request); | ||
2555 | osdc = &rbd_dev->rbd_client->client->osdc; | ||
2556 | result = rbd_obj_request_submit(osdc, orig_request); | ||
2557 | if (!result) | ||
2558 | return; | ||
2559 | } | ||
2358 | 2560 | ||
2359 | /* | 2561 | /* |
2360 | * Our only purpose here is to determine whether the object | 2562 | * Our only purpose here is to determine whether the object |
@@ -2512,14 +2714,36 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) | |||
2512 | struct rbd_obj_request *obj_request; | 2714 | struct rbd_obj_request *obj_request; |
2513 | struct rbd_device *rbd_dev; | 2715 | struct rbd_device *rbd_dev; |
2514 | u64 obj_end; | 2716 | u64 obj_end; |
2717 | u64 img_xferred; | ||
2718 | int img_result; | ||
2515 | 2719 | ||
2516 | rbd_assert(img_request_child_test(img_request)); | 2720 | rbd_assert(img_request_child_test(img_request)); |
2517 | 2721 | ||
2722 | /* First get what we need from the image request and release it */ | ||
2723 | |||
2518 | obj_request = img_request->obj_request; | 2724 | obj_request = img_request->obj_request; |
2725 | img_xferred = img_request->xferred; | ||
2726 | img_result = img_request->result; | ||
2727 | rbd_img_request_put(img_request); | ||
2728 | |||
2729 | /* | ||
2730 | * If the overlap has become 0 (most likely because the | ||
2731 | * image has been flattened) we need to re-submit the | ||
2732 | * original request. | ||
2733 | */ | ||
2519 | rbd_assert(obj_request); | 2734 | rbd_assert(obj_request); |
2520 | rbd_assert(obj_request->img_request); | 2735 | rbd_assert(obj_request->img_request); |
2736 | rbd_dev = obj_request->img_request->rbd_dev; | ||
2737 | if (!rbd_dev->parent_overlap) { | ||
2738 | struct ceph_osd_client *osdc; | ||
2521 | 2739 | ||
2522 | obj_request->result = img_request->result; | 2740 | osdc = &rbd_dev->rbd_client->client->osdc; |
2741 | img_result = rbd_obj_request_submit(osdc, obj_request); | ||
2742 | if (!img_result) | ||
2743 | return; | ||
2744 | } | ||
2745 | |||
2746 | obj_request->result = img_result; | ||
2523 | if (obj_request->result) | 2747 | if (obj_request->result) |
2524 | goto out; | 2748 | goto out; |
2525 | 2749 | ||
@@ -2532,7 +2756,6 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) | |||
2532 | */ | 2756 | */ |
2533 | rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length); | 2757 | rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length); |
2534 | obj_end = obj_request->img_offset + obj_request->length; | 2758 | obj_end = obj_request->img_offset + obj_request->length; |
2535 | rbd_dev = obj_request->img_request->rbd_dev; | ||
2536 | if (obj_end > rbd_dev->parent_overlap) { | 2759 | if (obj_end > rbd_dev->parent_overlap) { |
2537 | u64 xferred = 0; | 2760 | u64 xferred = 0; |
2538 | 2761 | ||
@@ -2540,43 +2763,39 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request) | |||
2540 | xferred = rbd_dev->parent_overlap - | 2763 | xferred = rbd_dev->parent_overlap - |
2541 | obj_request->img_offset; | 2764 | obj_request->img_offset; |
2542 | 2765 | ||
2543 | obj_request->xferred = min(img_request->xferred, xferred); | 2766 | obj_request->xferred = min(img_xferred, xferred); |
2544 | } else { | 2767 | } else { |
2545 | obj_request->xferred = img_request->xferred; | 2768 | obj_request->xferred = img_xferred; |
2546 | } | 2769 | } |
2547 | out: | 2770 | out: |
2548 | rbd_img_request_put(img_request); | ||
2549 | rbd_img_obj_request_read_callback(obj_request); | 2771 | rbd_img_obj_request_read_callback(obj_request); |
2550 | rbd_obj_request_complete(obj_request); | 2772 | rbd_obj_request_complete(obj_request); |
2551 | } | 2773 | } |
2552 | 2774 | ||
2553 | static void rbd_img_parent_read(struct rbd_obj_request *obj_request) | 2775 | static void rbd_img_parent_read(struct rbd_obj_request *obj_request) |
2554 | { | 2776 | { |
2555 | struct rbd_device *rbd_dev; | ||
2556 | struct rbd_img_request *img_request; | 2777 | struct rbd_img_request *img_request; |
2557 | int result; | 2778 | int result; |
2558 | 2779 | ||
2559 | rbd_assert(obj_request_img_data_test(obj_request)); | 2780 | rbd_assert(obj_request_img_data_test(obj_request)); |
2560 | rbd_assert(obj_request->img_request != NULL); | 2781 | rbd_assert(obj_request->img_request != NULL); |
2561 | rbd_assert(obj_request->result == (s32) -ENOENT); | 2782 | rbd_assert(obj_request->result == (s32) -ENOENT); |
2562 | rbd_assert(obj_request->type == OBJ_REQUEST_BIO); | 2783 | rbd_assert(obj_request_type_valid(obj_request->type)); |
2563 | 2784 | ||
2564 | rbd_dev = obj_request->img_request->rbd_dev; | ||
2565 | rbd_assert(rbd_dev->parent != NULL); | ||
2566 | /* rbd_read_finish(obj_request, obj_request->length); */ | 2785 | /* rbd_read_finish(obj_request, obj_request->length); */ |
2567 | img_request = rbd_img_request_create(rbd_dev->parent, | 2786 | img_request = rbd_parent_request_create(obj_request, |
2568 | obj_request->img_offset, | 2787 | obj_request->img_offset, |
2569 | obj_request->length, | 2788 | obj_request->length); |
2570 | false, true); | ||
2571 | result = -ENOMEM; | 2789 | result = -ENOMEM; |
2572 | if (!img_request) | 2790 | if (!img_request) |
2573 | goto out_err; | 2791 | goto out_err; |
2574 | 2792 | ||
2575 | rbd_obj_request_get(obj_request); | 2793 | if (obj_request->type == OBJ_REQUEST_BIO) |
2576 | img_request->obj_request = obj_request; | 2794 | result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, |
2577 | 2795 | obj_request->bio_list); | |
2578 | result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, | 2796 | else |
2579 | obj_request->bio_list); | 2797 | result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES, |
2798 | obj_request->pages); | ||
2580 | if (result) | 2799 | if (result) |
2581 | goto out_err; | 2800 | goto out_err; |
2582 | 2801 | ||
@@ -2626,6 +2845,7 @@ out: | |||
2626 | static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | 2845 | static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) |
2627 | { | 2846 | { |
2628 | struct rbd_device *rbd_dev = (struct rbd_device *)data; | 2847 | struct rbd_device *rbd_dev = (struct rbd_device *)data; |
2848 | int ret; | ||
2629 | 2849 | ||
2630 | if (!rbd_dev) | 2850 | if (!rbd_dev) |
2631 | return; | 2851 | return; |
@@ -2633,7 +2853,9 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | |||
2633 | dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, | 2853 | dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, |
2634 | rbd_dev->header_name, (unsigned long long)notify_id, | 2854 | rbd_dev->header_name, (unsigned long long)notify_id, |
2635 | (unsigned int)opcode); | 2855 | (unsigned int)opcode); |
2636 | (void)rbd_dev_refresh(rbd_dev); | 2856 | ret = rbd_dev_refresh(rbd_dev); |
2857 | if (ret) | ||
2858 | rbd_warn(rbd_dev, ": header refresh error (%d)\n", ret); | ||
2637 | 2859 | ||
2638 | rbd_obj_notify_ack(rbd_dev, notify_id); | 2860 | rbd_obj_notify_ack(rbd_dev, notify_id); |
2639 | } | 2861 | } |
@@ -2642,7 +2864,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) | |||
2642 | * Request sync osd watch/unwatch. The value of "start" determines | 2864 | * Request sync osd watch/unwatch. The value of "start" determines |
2643 | * whether a watch request is being initiated or torn down. | 2865 | * whether a watch request is being initiated or torn down. |
2644 | */ | 2866 | */ |
2645 | static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) | 2867 | static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start) |
2646 | { | 2868 | { |
2647 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; | 2869 | struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; |
2648 | struct rbd_obj_request *obj_request; | 2870 | struct rbd_obj_request *obj_request; |
@@ -2676,7 +2898,7 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) | |||
2676 | rbd_dev->watch_request->osd_req); | 2898 | rbd_dev->watch_request->osd_req); |
2677 | 2899 | ||
2678 | osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, | 2900 | osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, |
2679 | rbd_dev->watch_event->cookie, 0, start); | 2901 | rbd_dev->watch_event->cookie, 0, start ? 1 : 0); |
2680 | rbd_osd_req_format_write(obj_request); | 2902 | rbd_osd_req_format_write(obj_request); |
2681 | 2903 | ||
2682 | ret = rbd_obj_request_submit(osdc, obj_request); | 2904 | ret = rbd_obj_request_submit(osdc, obj_request); |
@@ -2869,9 +3091,16 @@ static void rbd_request_fn(struct request_queue *q) | |||
2869 | goto end_request; /* Shouldn't happen */ | 3091 | goto end_request; /* Shouldn't happen */ |
2870 | } | 3092 | } |
2871 | 3093 | ||
3094 | result = -EIO; | ||
3095 | if (offset + length > rbd_dev->mapping.size) { | ||
3096 | rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)\n", | ||
3097 | offset, length, rbd_dev->mapping.size); | ||
3098 | goto end_request; | ||
3099 | } | ||
3100 | |||
2872 | result = -ENOMEM; | 3101 | result = -ENOMEM; |
2873 | img_request = rbd_img_request_create(rbd_dev, offset, length, | 3102 | img_request = rbd_img_request_create(rbd_dev, offset, length, |
2874 | write_request, false); | 3103 | write_request); |
2875 | if (!img_request) | 3104 | if (!img_request) |
2876 | goto end_request; | 3105 | goto end_request; |
2877 | 3106 | ||
@@ -3022,17 +3251,11 @@ out: | |||
3022 | } | 3251 | } |
3023 | 3252 | ||
3024 | /* | 3253 | /* |
3025 | * Read the complete header for the given rbd device. | 3254 | * Read the complete header for the given rbd device. On successful |
3026 | * | 3255 | * return, the rbd_dev->header field will contain up-to-date |
3027 | * Returns a pointer to a dynamically-allocated buffer containing | 3256 | * information about the image. |
3028 | * the complete and validated header. Caller can pass the address | ||
3029 | * of a variable that will be filled in with the version of the | ||
3030 | * header object at the time it was read. | ||
3031 | * | ||
3032 | * Returns a pointer-coded errno if a failure occurs. | ||
3033 | */ | 3257 | */ |
3034 | static struct rbd_image_header_ondisk * | 3258 | static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev) |
3035 | rbd_dev_v1_header_read(struct rbd_device *rbd_dev) | ||
3036 | { | 3259 | { |
3037 | struct rbd_image_header_ondisk *ondisk = NULL; | 3260 | struct rbd_image_header_ondisk *ondisk = NULL; |
3038 | u32 snap_count = 0; | 3261 | u32 snap_count = 0; |
@@ -3057,22 +3280,22 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev) | |||
3057 | size += names_size; | 3280 | size += names_size; |
3058 | ondisk = kmalloc(size, GFP_KERNEL); | 3281 | ondisk = kmalloc(size, GFP_KERNEL); |
3059 | if (!ondisk) | 3282 | if (!ondisk) |
3060 | return ERR_PTR(-ENOMEM); | 3283 | return -ENOMEM; |
3061 | 3284 | ||
3062 | ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, | 3285 | ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, |
3063 | 0, size, ondisk); | 3286 | 0, size, ondisk); |
3064 | if (ret < 0) | 3287 | if (ret < 0) |
3065 | goto out_err; | 3288 | goto out; |
3066 | if ((size_t)ret < size) { | 3289 | if ((size_t)ret < size) { |
3067 | ret = -ENXIO; | 3290 | ret = -ENXIO; |
3068 | rbd_warn(rbd_dev, "short header read (want %zd got %d)", | 3291 | rbd_warn(rbd_dev, "short header read (want %zd got %d)", |
3069 | size, ret); | 3292 | size, ret); |
3070 | goto out_err; | 3293 | goto out; |
3071 | } | 3294 | } |
3072 | if (!rbd_dev_ondisk_valid(ondisk)) { | 3295 | if (!rbd_dev_ondisk_valid(ondisk)) { |
3073 | ret = -ENXIO; | 3296 | ret = -ENXIO; |
3074 | rbd_warn(rbd_dev, "invalid header"); | 3297 | rbd_warn(rbd_dev, "invalid header"); |
3075 | goto out_err; | 3298 | goto out; |
3076 | } | 3299 | } |
3077 | 3300 | ||
3078 | names_size = le64_to_cpu(ondisk->snap_names_len); | 3301 | names_size = le64_to_cpu(ondisk->snap_names_len); |
@@ -3080,85 +3303,13 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev) | |||
3080 | snap_count = le32_to_cpu(ondisk->snap_count); | 3303 | snap_count = le32_to_cpu(ondisk->snap_count); |
3081 | } while (snap_count != want_count); | 3304 | } while (snap_count != want_count); |
3082 | 3305 | ||
3083 | return ondisk; | 3306 | ret = rbd_header_from_disk(rbd_dev, ondisk); |
3084 | 3307 | out: | |
3085 | out_err: | ||
3086 | kfree(ondisk); | ||
3087 | |||
3088 | return ERR_PTR(ret); | ||
3089 | } | ||
3090 | |||
3091 | /* | ||
3092 | * reload the ondisk the header | ||
3093 | */ | ||
3094 | static int rbd_read_header(struct rbd_device *rbd_dev, | ||
3095 | struct rbd_image_header *header) | ||
3096 | { | ||
3097 | struct rbd_image_header_ondisk *ondisk; | ||
3098 | int ret; | ||
3099 | |||
3100 | ondisk = rbd_dev_v1_header_read(rbd_dev); | ||
3101 | if (IS_ERR(ondisk)) | ||
3102 | return PTR_ERR(ondisk); | ||
3103 | ret = rbd_header_from_disk(header, ondisk); | ||
3104 | kfree(ondisk); | 3308 | kfree(ondisk); |
3105 | 3309 | ||
3106 | return ret; | 3310 | return ret; |
3107 | } | 3311 | } |
3108 | 3312 | ||
3109 | static void rbd_update_mapping_size(struct rbd_device *rbd_dev) | ||
3110 | { | ||
3111 | if (rbd_dev->spec->snap_id != CEPH_NOSNAP) | ||
3112 | return; | ||
3113 | |||
3114 | if (rbd_dev->mapping.size != rbd_dev->header.image_size) { | ||
3115 | sector_t size; | ||
3116 | |||
3117 | rbd_dev->mapping.size = rbd_dev->header.image_size; | ||
3118 | size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; | ||
3119 | dout("setting size to %llu sectors", (unsigned long long)size); | ||
3120 | set_capacity(rbd_dev->disk, size); | ||
3121 | } | ||
3122 | } | ||
3123 | |||
3124 | /* | ||
3125 | * only read the first part of the ondisk header, without the snaps info | ||
3126 | */ | ||
3127 | static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev) | ||
3128 | { | ||
3129 | int ret; | ||
3130 | struct rbd_image_header h; | ||
3131 | |||
3132 | ret = rbd_read_header(rbd_dev, &h); | ||
3133 | if (ret < 0) | ||
3134 | return ret; | ||
3135 | |||
3136 | down_write(&rbd_dev->header_rwsem); | ||
3137 | |||
3138 | /* Update image size, and check for resize of mapped image */ | ||
3139 | rbd_dev->header.image_size = h.image_size; | ||
3140 | rbd_update_mapping_size(rbd_dev); | ||
3141 | |||
3142 | /* rbd_dev->header.object_prefix shouldn't change */ | ||
3143 | kfree(rbd_dev->header.snap_sizes); | ||
3144 | kfree(rbd_dev->header.snap_names); | ||
3145 | /* osd requests may still refer to snapc */ | ||
3146 | ceph_put_snap_context(rbd_dev->header.snapc); | ||
3147 | |||
3148 | rbd_dev->header.image_size = h.image_size; | ||
3149 | rbd_dev->header.snapc = h.snapc; | ||
3150 | rbd_dev->header.snap_names = h.snap_names; | ||
3151 | rbd_dev->header.snap_sizes = h.snap_sizes; | ||
3152 | /* Free the extra copy of the object prefix */ | ||
3153 | if (strcmp(rbd_dev->header.object_prefix, h.object_prefix)) | ||
3154 | rbd_warn(rbd_dev, "object prefix changed (ignoring)"); | ||
3155 | kfree(h.object_prefix); | ||
3156 | |||
3157 | up_write(&rbd_dev->header_rwsem); | ||
3158 | |||
3159 | return ret; | ||
3160 | } | ||
3161 | |||
3162 | /* | 3313 | /* |
3163 | * Clear the rbd device's EXISTS flag if the snapshot it's mapped to | 3314 | * Clear the rbd device's EXISTS flag if the snapshot it's mapped to |
3164 | * has disappeared from the (just updated) snapshot context. | 3315 | * has disappeared from the (just updated) snapshot context. |
@@ -3180,26 +3331,29 @@ static void rbd_exists_validate(struct rbd_device *rbd_dev) | |||
3180 | 3331 | ||
3181 | static int rbd_dev_refresh(struct rbd_device *rbd_dev) | 3332 | static int rbd_dev_refresh(struct rbd_device *rbd_dev) |
3182 | { | 3333 | { |
3183 | u64 image_size; | 3334 | u64 mapping_size; |
3184 | int ret; | 3335 | int ret; |
3185 | 3336 | ||
3186 | rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); | 3337 | rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); |
3187 | image_size = rbd_dev->header.image_size; | 3338 | mapping_size = rbd_dev->mapping.size; |
3188 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); | 3339 | mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); |
3189 | if (rbd_dev->image_format == 1) | 3340 | if (rbd_dev->image_format == 1) |
3190 | ret = rbd_dev_v1_refresh(rbd_dev); | 3341 | ret = rbd_dev_v1_header_info(rbd_dev); |
3191 | else | 3342 | else |
3192 | ret = rbd_dev_v2_refresh(rbd_dev); | 3343 | ret = rbd_dev_v2_header_info(rbd_dev); |
3193 | 3344 | ||
3194 | /* If it's a mapped snapshot, validate its EXISTS flag */ | 3345 | /* If it's a mapped snapshot, validate its EXISTS flag */ |
3195 | 3346 | ||
3196 | rbd_exists_validate(rbd_dev); | 3347 | rbd_exists_validate(rbd_dev); |
3197 | mutex_unlock(&ctl_mutex); | 3348 | mutex_unlock(&ctl_mutex); |
3198 | if (ret) | 3349 | if (mapping_size != rbd_dev->mapping.size) { |
3199 | rbd_warn(rbd_dev, "got notification but failed to " | 3350 | sector_t size; |
3200 | " update snaps: %d\n", ret); | 3351 | |
3201 | if (image_size != rbd_dev->header.image_size) | 3352 | size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; |
3353 | dout("setting size to %llu sectors", (unsigned long long)size); | ||
3354 | set_capacity(rbd_dev->disk, size); | ||
3202 | revalidate_disk(rbd_dev->disk); | 3355 | revalidate_disk(rbd_dev->disk); |
3356 | } | ||
3203 | 3357 | ||
3204 | return ret; | 3358 | return ret; |
3205 | } | 3359 | } |
@@ -3403,6 +3557,8 @@ static ssize_t rbd_image_refresh(struct device *dev, | |||
3403 | int ret; | 3557 | int ret; |
3404 | 3558 | ||
3405 | ret = rbd_dev_refresh(rbd_dev); | 3559 | ret = rbd_dev_refresh(rbd_dev); |
3560 | if (ret) | ||
3561 | rbd_warn(rbd_dev, ": manual header refresh error (%d)\n", ret); | ||
3406 | 3562 | ||
3407 | return ret < 0 ? ret : size; | 3563 | return ret < 0 ? ret : size; |
3408 | } | 3564 | } |
@@ -3501,6 +3657,7 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, | |||
3501 | 3657 | ||
3502 | spin_lock_init(&rbd_dev->lock); | 3658 | spin_lock_init(&rbd_dev->lock); |
3503 | rbd_dev->flags = 0; | 3659 | rbd_dev->flags = 0; |
3660 | atomic_set(&rbd_dev->parent_ref, 0); | ||
3504 | INIT_LIST_HEAD(&rbd_dev->node); | 3661 | INIT_LIST_HEAD(&rbd_dev->node); |
3505 | init_rwsem(&rbd_dev->header_rwsem); | 3662 | init_rwsem(&rbd_dev->header_rwsem); |
3506 | 3663 | ||
@@ -3650,6 +3807,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
3650 | __le64 snapid; | 3807 | __le64 snapid; |
3651 | void *p; | 3808 | void *p; |
3652 | void *end; | 3809 | void *end; |
3810 | u64 pool_id; | ||
3653 | char *image_id; | 3811 | char *image_id; |
3654 | u64 overlap; | 3812 | u64 overlap; |
3655 | int ret; | 3813 | int ret; |
@@ -3680,18 +3838,37 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
3680 | p = reply_buf; | 3838 | p = reply_buf; |
3681 | end = reply_buf + ret; | 3839 | end = reply_buf + ret; |
3682 | ret = -ERANGE; | 3840 | ret = -ERANGE; |
3683 | ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); | 3841 | ceph_decode_64_safe(&p, end, pool_id, out_err); |
3684 | if (parent_spec->pool_id == CEPH_NOPOOL) | 3842 | if (pool_id == CEPH_NOPOOL) { |
3843 | /* | ||
3844 | * Either the parent never existed, or we have | ||
3845 | * record of it but the image got flattened so it no | ||
3846 | * longer has a parent. When the parent of a | ||
3847 | * layered image disappears we immediately set the | ||
3848 | * overlap to 0. The effect of this is that all new | ||
3849 | * requests will be treated as if the image had no | ||
3850 | * parent. | ||
3851 | */ | ||
3852 | if (rbd_dev->parent_overlap) { | ||
3853 | rbd_dev->parent_overlap = 0; | ||
3854 | smp_mb(); | ||
3855 | rbd_dev_parent_put(rbd_dev); | ||
3856 | pr_info("%s: clone image has been flattened\n", | ||
3857 | rbd_dev->disk->disk_name); | ||
3858 | } | ||
3859 | |||
3685 | goto out; /* No parent? No problem. */ | 3860 | goto out; /* No parent? No problem. */ |
3861 | } | ||
3686 | 3862 | ||
3687 | /* The ceph file layout needs to fit pool id in 32 bits */ | 3863 | /* The ceph file layout needs to fit pool id in 32 bits */ |
3688 | 3864 | ||
3689 | ret = -EIO; | 3865 | ret = -EIO; |
3690 | if (parent_spec->pool_id > (u64)U32_MAX) { | 3866 | if (pool_id > (u64)U32_MAX) { |
3691 | rbd_warn(NULL, "parent pool id too large (%llu > %u)\n", | 3867 | rbd_warn(NULL, "parent pool id too large (%llu > %u)\n", |
3692 | (unsigned long long)parent_spec->pool_id, U32_MAX); | 3868 | (unsigned long long)pool_id, U32_MAX); |
3693 | goto out_err; | 3869 | goto out_err; |
3694 | } | 3870 | } |
3871 | parent_spec->pool_id = pool_id; | ||
3695 | 3872 | ||
3696 | image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); | 3873 | image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); |
3697 | if (IS_ERR(image_id)) { | 3874 | if (IS_ERR(image_id)) { |
@@ -3702,9 +3879,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) | |||
3702 | ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); | 3879 | ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); |
3703 | ceph_decode_64_safe(&p, end, overlap, out_err); | 3880 | ceph_decode_64_safe(&p, end, overlap, out_err); |
3704 | 3881 | ||
3705 | rbd_dev->parent_overlap = overlap; | 3882 | if (overlap) { |
3706 | rbd_dev->parent_spec = parent_spec; | 3883 | rbd_spec_put(rbd_dev->parent_spec); |
3707 | parent_spec = NULL; /* rbd_dev now owns this */ | 3884 | rbd_dev->parent_spec = parent_spec; |
3885 | parent_spec = NULL; /* rbd_dev now owns this */ | ||
3886 | rbd_dev->parent_overlap = overlap; | ||
3887 | } else { | ||
3888 | rbd_warn(rbd_dev, "ignoring parent of clone with overlap 0\n"); | ||
3889 | } | ||
3708 | out: | 3890 | out: |
3709 | ret = 0; | 3891 | ret = 0; |
3710 | out_err: | 3892 | out_err: |
@@ -4002,6 +4184,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev) | |||
4002 | for (i = 0; i < snap_count; i++) | 4184 | for (i = 0; i < snap_count; i++) |
4003 | snapc->snaps[i] = ceph_decode_64(&p); | 4185 | snapc->snaps[i] = ceph_decode_64(&p); |
4004 | 4186 | ||
4187 | ceph_put_snap_context(rbd_dev->header.snapc); | ||
4005 | rbd_dev->header.snapc = snapc; | 4188 | rbd_dev->header.snapc = snapc; |
4006 | 4189 | ||
4007 | dout(" snap context seq = %llu, snap_count = %u\n", | 4190 | dout(" snap context seq = %llu, snap_count = %u\n", |
@@ -4053,21 +4236,56 @@ out: | |||
4053 | return snap_name; | 4236 | return snap_name; |
4054 | } | 4237 | } |
4055 | 4238 | ||
4056 | static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev) | 4239 | static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev) |
4057 | { | 4240 | { |
4241 | bool first_time = rbd_dev->header.object_prefix == NULL; | ||
4058 | int ret; | 4242 | int ret; |
4059 | 4243 | ||
4060 | down_write(&rbd_dev->header_rwsem); | 4244 | down_write(&rbd_dev->header_rwsem); |
4061 | 4245 | ||
4246 | if (first_time) { | ||
4247 | ret = rbd_dev_v2_header_onetime(rbd_dev); | ||
4248 | if (ret) | ||
4249 | goto out; | ||
4250 | } | ||
4251 | |||
4252 | /* | ||
4253 | * If the image supports layering, get the parent info. We | ||
4254 | * need to probe the first time regardless. Thereafter we | ||
4255 | * only need to if there's a parent, to see if it has | ||
4256 | * disappeared due to the mapped image getting flattened. | ||
4257 | */ | ||
4258 | if (rbd_dev->header.features & RBD_FEATURE_LAYERING && | ||
4259 | (first_time || rbd_dev->parent_spec)) { | ||
4260 | bool warn; | ||
4261 | |||
4262 | ret = rbd_dev_v2_parent_info(rbd_dev); | ||
4263 | if (ret) | ||
4264 | goto out; | ||
4265 | |||
4266 | /* | ||
4267 | * Print a warning if this is the initial probe and | ||
4268 | * the image has a parent. Don't print it if the | ||
4269 | * image now being probed is itself a parent. We | ||
4270 | * can tell at this point because we won't know its | ||
4271 | * pool name yet (just its pool id). | ||
4272 | */ | ||
4273 | warn = rbd_dev->parent_spec && rbd_dev->spec->pool_name; | ||
4274 | if (first_time && warn) | ||
4275 | rbd_warn(rbd_dev, "WARNING: kernel layering " | ||
4276 | "is EXPERIMENTAL!"); | ||
4277 | } | ||
4278 | |||
4062 | ret = rbd_dev_v2_image_size(rbd_dev); | 4279 | ret = rbd_dev_v2_image_size(rbd_dev); |
4063 | if (ret) | 4280 | if (ret) |
4064 | goto out; | 4281 | goto out; |
4065 | rbd_update_mapping_size(rbd_dev); | 4282 | |
4283 | if (rbd_dev->spec->snap_id == CEPH_NOSNAP) | ||
4284 | if (rbd_dev->mapping.size != rbd_dev->header.image_size) | ||
4285 | rbd_dev->mapping.size = rbd_dev->header.image_size; | ||
4066 | 4286 | ||
4067 | ret = rbd_dev_v2_snap_context(rbd_dev); | 4287 | ret = rbd_dev_v2_snap_context(rbd_dev); |
4068 | dout("rbd_dev_v2_snap_context returned %d\n", ret); | 4288 | dout("rbd_dev_v2_snap_context returned %d\n", ret); |
4069 | if (ret) | ||
4070 | goto out; | ||
4071 | out: | 4289 | out: |
4072 | up_write(&rbd_dev->header_rwsem); | 4290 | up_write(&rbd_dev->header_rwsem); |
4073 | 4291 | ||
@@ -4484,16 +4702,18 @@ out: | |||
4484 | return ret; | 4702 | return ret; |
4485 | } | 4703 | } |
4486 | 4704 | ||
4487 | /* Undo whatever state changes are made by v1 or v2 image probe */ | 4705 | /* |
4488 | 4706 | * Undo whatever state changes are made by v1 or v2 header info | |
4707 | * call. | ||
4708 | */ | ||
4489 | static void rbd_dev_unprobe(struct rbd_device *rbd_dev) | 4709 | static void rbd_dev_unprobe(struct rbd_device *rbd_dev) |
4490 | { | 4710 | { |
4491 | struct rbd_image_header *header; | 4711 | struct rbd_image_header *header; |
4492 | 4712 | ||
4493 | rbd_dev_remove_parent(rbd_dev); | 4713 | /* Drop parent reference unless it's already been done (or none) */ |
4494 | rbd_spec_put(rbd_dev->parent_spec); | 4714 | |
4495 | rbd_dev->parent_spec = NULL; | 4715 | if (rbd_dev->parent_overlap) |
4496 | rbd_dev->parent_overlap = 0; | 4716 | rbd_dev_parent_put(rbd_dev); |
4497 | 4717 | ||
4498 | /* Free dynamic fields from the header, then zero it out */ | 4718 | /* Free dynamic fields from the header, then zero it out */ |
4499 | 4719 | ||
@@ -4505,72 +4725,22 @@ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) | |||
4505 | memset(header, 0, sizeof (*header)); | 4725 | memset(header, 0, sizeof (*header)); |
4506 | } | 4726 | } |
4507 | 4727 | ||
4508 | static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) | 4728 | static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev) |
4509 | { | 4729 | { |
4510 | int ret; | 4730 | int ret; |
4511 | 4731 | ||
4512 | /* Populate rbd image metadata */ | ||
4513 | |||
4514 | ret = rbd_read_header(rbd_dev, &rbd_dev->header); | ||
4515 | if (ret < 0) | ||
4516 | goto out_err; | ||
4517 | |||
4518 | /* Version 1 images have no parent (no layering) */ | ||
4519 | |||
4520 | rbd_dev->parent_spec = NULL; | ||
4521 | rbd_dev->parent_overlap = 0; | ||
4522 | |||
4523 | dout("discovered version 1 image, header name is %s\n", | ||
4524 | rbd_dev->header_name); | ||
4525 | |||
4526 | return 0; | ||
4527 | |||
4528 | out_err: | ||
4529 | kfree(rbd_dev->header_name); | ||
4530 | rbd_dev->header_name = NULL; | ||
4531 | kfree(rbd_dev->spec->image_id); | ||
4532 | rbd_dev->spec->image_id = NULL; | ||
4533 | |||
4534 | return ret; | ||
4535 | } | ||
4536 | |||
4537 | static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) | ||
4538 | { | ||
4539 | int ret; | ||
4540 | |||
4541 | ret = rbd_dev_v2_image_size(rbd_dev); | ||
4542 | if (ret) | ||
4543 | goto out_err; | ||
4544 | |||
4545 | /* Get the object prefix (a.k.a. block_name) for the image */ | ||
4546 | |||
4547 | ret = rbd_dev_v2_object_prefix(rbd_dev); | 4732 | ret = rbd_dev_v2_object_prefix(rbd_dev); |
4548 | if (ret) | 4733 | if (ret) |
4549 | goto out_err; | 4734 | goto out_err; |
4550 | 4735 | ||
4551 | /* Get the and check features for the image */ | 4736 | /* |
4552 | 4737 | * Get the and check features for the image. Currently the | |
4738 | * features are assumed to never change. | ||
4739 | */ | ||
4553 | ret = rbd_dev_v2_features(rbd_dev); | 4740 | ret = rbd_dev_v2_features(rbd_dev); |
4554 | if (ret) | 4741 | if (ret) |
4555 | goto out_err; | 4742 | goto out_err; |
4556 | 4743 | ||
4557 | /* If the image supports layering, get the parent info */ | ||
4558 | |||
4559 | if (rbd_dev->header.features & RBD_FEATURE_LAYERING) { | ||
4560 | ret = rbd_dev_v2_parent_info(rbd_dev); | ||
4561 | if (ret) | ||
4562 | goto out_err; | ||
4563 | |||
4564 | /* | ||
4565 | * Don't print a warning for parent images. We can | ||
4566 | * tell this point because we won't know its pool | ||
4567 | * name yet (just its pool id). | ||
4568 | */ | ||
4569 | if (rbd_dev->spec->pool_name) | ||
4570 | rbd_warn(rbd_dev, "WARNING: kernel layering " | ||
4571 | "is EXPERIMENTAL!"); | ||
4572 | } | ||
4573 | |||
4574 | /* If the image supports fancy striping, get its parameters */ | 4744 | /* If the image supports fancy striping, get its parameters */ |
4575 | 4745 | ||
4576 | if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { | 4746 | if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { |
@@ -4578,28 +4748,11 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) | |||
4578 | if (ret < 0) | 4748 | if (ret < 0) |
4579 | goto out_err; | 4749 | goto out_err; |
4580 | } | 4750 | } |
4581 | 4751 | /* No support for crypto and compression type format 2 images */ | |
4582 | /* crypto and compression type aren't (yet) supported for v2 images */ | ||
4583 | |||
4584 | rbd_dev->header.crypt_type = 0; | ||
4585 | rbd_dev->header.comp_type = 0; | ||
4586 | |||
4587 | /* Get the snapshot context, plus the header version */ | ||
4588 | |||
4589 | ret = rbd_dev_v2_snap_context(rbd_dev); | ||
4590 | if (ret) | ||
4591 | goto out_err; | ||
4592 | |||
4593 | dout("discovered version 2 image, header name is %s\n", | ||
4594 | rbd_dev->header_name); | ||
4595 | 4752 | ||
4596 | return 0; | 4753 | return 0; |
4597 | out_err: | 4754 | out_err: |
4598 | rbd_dev->parent_overlap = 0; | 4755 | rbd_dev->header.features = 0; |
4599 | rbd_spec_put(rbd_dev->parent_spec); | ||
4600 | rbd_dev->parent_spec = NULL; | ||
4601 | kfree(rbd_dev->header_name); | ||
4602 | rbd_dev->header_name = NULL; | ||
4603 | kfree(rbd_dev->header.object_prefix); | 4756 | kfree(rbd_dev->header.object_prefix); |
4604 | rbd_dev->header.object_prefix = NULL; | 4757 | rbd_dev->header.object_prefix = NULL; |
4605 | 4758 | ||
@@ -4628,15 +4781,16 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) | |||
4628 | if (!parent) | 4781 | if (!parent) |
4629 | goto out_err; | 4782 | goto out_err; |
4630 | 4783 | ||
4631 | ret = rbd_dev_image_probe(parent); | 4784 | ret = rbd_dev_image_probe(parent, false); |
4632 | if (ret < 0) | 4785 | if (ret < 0) |
4633 | goto out_err; | 4786 | goto out_err; |
4634 | rbd_dev->parent = parent; | 4787 | rbd_dev->parent = parent; |
4788 | atomic_set(&rbd_dev->parent_ref, 1); | ||
4635 | 4789 | ||
4636 | return 0; | 4790 | return 0; |
4637 | out_err: | 4791 | out_err: |
4638 | if (parent) { | 4792 | if (parent) { |
4639 | rbd_spec_put(rbd_dev->parent_spec); | 4793 | rbd_dev_unparent(rbd_dev); |
4640 | kfree(rbd_dev->header_name); | 4794 | kfree(rbd_dev->header_name); |
4641 | rbd_dev_destroy(parent); | 4795 | rbd_dev_destroy(parent); |
4642 | } else { | 4796 | } else { |
@@ -4651,10 +4805,6 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) | |||
4651 | { | 4805 | { |
4652 | int ret; | 4806 | int ret; |
4653 | 4807 | ||
4654 | ret = rbd_dev_mapping_set(rbd_dev); | ||
4655 | if (ret) | ||
4656 | return ret; | ||
4657 | |||
4658 | /* generate unique id: find highest unique id, add one */ | 4808 | /* generate unique id: find highest unique id, add one */ |
4659 | rbd_dev_id_get(rbd_dev); | 4809 | rbd_dev_id_get(rbd_dev); |
4660 | 4810 | ||
@@ -4676,13 +4826,17 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) | |||
4676 | if (ret) | 4826 | if (ret) |
4677 | goto err_out_blkdev; | 4827 | goto err_out_blkdev; |
4678 | 4828 | ||
4679 | ret = rbd_bus_add_dev(rbd_dev); | 4829 | ret = rbd_dev_mapping_set(rbd_dev); |
4680 | if (ret) | 4830 | if (ret) |
4681 | goto err_out_disk; | 4831 | goto err_out_disk; |
4832 | set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); | ||
4833 | |||
4834 | ret = rbd_bus_add_dev(rbd_dev); | ||
4835 | if (ret) | ||
4836 | goto err_out_mapping; | ||
4682 | 4837 | ||
4683 | /* Everything's ready. Announce the disk to the world. */ | 4838 | /* Everything's ready. Announce the disk to the world. */ |
4684 | 4839 | ||
4685 | set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); | ||
4686 | set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); | 4840 | set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); |
4687 | add_disk(rbd_dev->disk); | 4841 | add_disk(rbd_dev->disk); |
4688 | 4842 | ||
@@ -4691,6 +4845,8 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) | |||
4691 | 4845 | ||
4692 | return ret; | 4846 | return ret; |
4693 | 4847 | ||
4848 | err_out_mapping: | ||
4849 | rbd_dev_mapping_clear(rbd_dev); | ||
4694 | err_out_disk: | 4850 | err_out_disk: |
4695 | rbd_free_disk(rbd_dev); | 4851 | rbd_free_disk(rbd_dev); |
4696 | err_out_blkdev: | 4852 | err_out_blkdev: |
@@ -4731,12 +4887,7 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev) | |||
4731 | 4887 | ||
4732 | static void rbd_dev_image_release(struct rbd_device *rbd_dev) | 4888 | static void rbd_dev_image_release(struct rbd_device *rbd_dev) |
4733 | { | 4889 | { |
4734 | int ret; | ||
4735 | |||
4736 | rbd_dev_unprobe(rbd_dev); | 4890 | rbd_dev_unprobe(rbd_dev); |
4737 | ret = rbd_dev_header_watch_sync(rbd_dev, 0); | ||
4738 | if (ret) | ||
4739 | rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); | ||
4740 | kfree(rbd_dev->header_name); | 4891 | kfree(rbd_dev->header_name); |
4741 | rbd_dev->header_name = NULL; | 4892 | rbd_dev->header_name = NULL; |
4742 | rbd_dev->image_format = 0; | 4893 | rbd_dev->image_format = 0; |
@@ -4748,18 +4899,20 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev) | |||
4748 | 4899 | ||
4749 | /* | 4900 | /* |
4750 | * Probe for the existence of the header object for the given rbd | 4901 | * Probe for the existence of the header object for the given rbd |
4751 | * device. For format 2 images this includes determining the image | 4902 | * device. If this image is the one being mapped (i.e., not a |
4752 | * id. | 4903 | * parent), initiate a watch on its header object before using that |
4904 | * object to get detailed information about the rbd image. | ||
4753 | */ | 4905 | */ |
4754 | static int rbd_dev_image_probe(struct rbd_device *rbd_dev) | 4906 | static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) |
4755 | { | 4907 | { |
4756 | int ret; | 4908 | int ret; |
4757 | int tmp; | 4909 | int tmp; |
4758 | 4910 | ||
4759 | /* | 4911 | /* |
4760 | * Get the id from the image id object. If it's not a | 4912 | * Get the id from the image id object. Unless there's an |
4761 | * format 2 image, we'll get ENOENT back, and we'll assume | 4913 | * error, rbd_dev->spec->image_id will be filled in with |
4762 | * it's a format 1 image. | 4914 | * a dynamically-allocated string, and rbd_dev->image_format |
4915 | * will be set to either 1 or 2. | ||
4763 | */ | 4916 | */ |
4764 | ret = rbd_dev_image_id(rbd_dev); | 4917 | ret = rbd_dev_image_id(rbd_dev); |
4765 | if (ret) | 4918 | if (ret) |
@@ -4771,14 +4924,16 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev) | |||
4771 | if (ret) | 4924 | if (ret) |
4772 | goto err_out_format; | 4925 | goto err_out_format; |
4773 | 4926 | ||
4774 | ret = rbd_dev_header_watch_sync(rbd_dev, 1); | 4927 | if (mapping) { |
4775 | if (ret) | 4928 | ret = rbd_dev_header_watch_sync(rbd_dev, true); |
4776 | goto out_header_name; | 4929 | if (ret) |
4930 | goto out_header_name; | ||
4931 | } | ||
4777 | 4932 | ||
4778 | if (rbd_dev->image_format == 1) | 4933 | if (rbd_dev->image_format == 1) |
4779 | ret = rbd_dev_v1_probe(rbd_dev); | 4934 | ret = rbd_dev_v1_header_info(rbd_dev); |
4780 | else | 4935 | else |
4781 | ret = rbd_dev_v2_probe(rbd_dev); | 4936 | ret = rbd_dev_v2_header_info(rbd_dev); |
4782 | if (ret) | 4937 | if (ret) |
4783 | goto err_out_watch; | 4938 | goto err_out_watch; |
4784 | 4939 | ||
@@ -4787,15 +4942,22 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev) | |||
4787 | goto err_out_probe; | 4942 | goto err_out_probe; |
4788 | 4943 | ||
4789 | ret = rbd_dev_probe_parent(rbd_dev); | 4944 | ret = rbd_dev_probe_parent(rbd_dev); |
4790 | if (!ret) | 4945 | if (ret) |
4791 | return 0; | 4946 | goto err_out_probe; |
4947 | |||
4948 | dout("discovered format %u image, header name is %s\n", | ||
4949 | rbd_dev->image_format, rbd_dev->header_name); | ||
4792 | 4950 | ||
4951 | return 0; | ||
4793 | err_out_probe: | 4952 | err_out_probe: |
4794 | rbd_dev_unprobe(rbd_dev); | 4953 | rbd_dev_unprobe(rbd_dev); |
4795 | err_out_watch: | 4954 | err_out_watch: |
4796 | tmp = rbd_dev_header_watch_sync(rbd_dev, 0); | 4955 | if (mapping) { |
4797 | if (tmp) | 4956 | tmp = rbd_dev_header_watch_sync(rbd_dev, false); |
4798 | rbd_warn(rbd_dev, "unable to tear down watch request\n"); | 4957 | if (tmp) |
4958 | rbd_warn(rbd_dev, "unable to tear down " | ||
4959 | "watch request (%d)\n", tmp); | ||
4960 | } | ||
4799 | out_header_name: | 4961 | out_header_name: |
4800 | kfree(rbd_dev->header_name); | 4962 | kfree(rbd_dev->header_name); |
4801 | rbd_dev->header_name = NULL; | 4963 | rbd_dev->header_name = NULL; |
@@ -4819,6 +4981,7 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
4819 | struct rbd_spec *spec = NULL; | 4981 | struct rbd_spec *spec = NULL; |
4820 | struct rbd_client *rbdc; | 4982 | struct rbd_client *rbdc; |
4821 | struct ceph_osd_client *osdc; | 4983 | struct ceph_osd_client *osdc; |
4984 | bool read_only; | ||
4822 | int rc = -ENOMEM; | 4985 | int rc = -ENOMEM; |
4823 | 4986 | ||
4824 | if (!try_module_get(THIS_MODULE)) | 4987 | if (!try_module_get(THIS_MODULE)) |
@@ -4828,13 +4991,15 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
4828 | rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); | 4991 | rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); |
4829 | if (rc < 0) | 4992 | if (rc < 0) |
4830 | goto err_out_module; | 4993 | goto err_out_module; |
4994 | read_only = rbd_opts->read_only; | ||
4995 | kfree(rbd_opts); | ||
4996 | rbd_opts = NULL; /* done with this */ | ||
4831 | 4997 | ||
4832 | rbdc = rbd_get_client(ceph_opts); | 4998 | rbdc = rbd_get_client(ceph_opts); |
4833 | if (IS_ERR(rbdc)) { | 4999 | if (IS_ERR(rbdc)) { |
4834 | rc = PTR_ERR(rbdc); | 5000 | rc = PTR_ERR(rbdc); |
4835 | goto err_out_args; | 5001 | goto err_out_args; |
4836 | } | 5002 | } |
4837 | ceph_opts = NULL; /* rbd_dev client now owns this */ | ||
4838 | 5003 | ||
4839 | /* pick the pool */ | 5004 | /* pick the pool */ |
4840 | osdc = &rbdc->client->osdc; | 5005 | osdc = &rbdc->client->osdc; |
@@ -4858,27 +5023,29 @@ static ssize_t rbd_add(struct bus_type *bus, | |||
4858 | rbdc = NULL; /* rbd_dev now owns this */ | 5023 | rbdc = NULL; /* rbd_dev now owns this */ |
4859 | spec = NULL; /* rbd_dev now owns this */ | 5024 | spec = NULL; /* rbd_dev now owns this */ |
4860 | 5025 | ||
4861 | rbd_dev->mapping.read_only = rbd_opts->read_only; | 5026 | rc = rbd_dev_image_probe(rbd_dev, true); |
4862 | kfree(rbd_opts); | ||
4863 | rbd_opts = NULL; /* done with this */ | ||
4864 | |||
4865 | rc = rbd_dev_image_probe(rbd_dev); | ||
4866 | if (rc < 0) | 5027 | if (rc < 0) |
4867 | goto err_out_rbd_dev; | 5028 | goto err_out_rbd_dev; |
4868 | 5029 | ||
5030 | /* If we are mapping a snapshot it must be marked read-only */ | ||
5031 | |||
5032 | if (rbd_dev->spec->snap_id != CEPH_NOSNAP) | ||
5033 | read_only = true; | ||
5034 | rbd_dev->mapping.read_only = read_only; | ||
5035 | |||
4869 | rc = rbd_dev_device_setup(rbd_dev); | 5036 | rc = rbd_dev_device_setup(rbd_dev); |
4870 | if (!rc) | 5037 | if (rc) { |
4871 | return count; | 5038 | rbd_dev_image_release(rbd_dev); |
5039 | goto err_out_module; | ||
5040 | } | ||
5041 | |||
5042 | return count; | ||
4872 | 5043 | ||
4873 | rbd_dev_image_release(rbd_dev); | ||
4874 | err_out_rbd_dev: | 5044 | err_out_rbd_dev: |
4875 | rbd_dev_destroy(rbd_dev); | 5045 | rbd_dev_destroy(rbd_dev); |
4876 | err_out_client: | 5046 | err_out_client: |
4877 | rbd_put_client(rbdc); | 5047 | rbd_put_client(rbdc); |
4878 | err_out_args: | 5048 | err_out_args: |
4879 | if (ceph_opts) | ||
4880 | ceph_destroy_options(ceph_opts); | ||
4881 | kfree(rbd_opts); | ||
4882 | rbd_spec_put(spec); | 5049 | rbd_spec_put(spec); |
4883 | err_out_module: | 5050 | err_out_module: |
4884 | module_put(THIS_MODULE); | 5051 | module_put(THIS_MODULE); |
@@ -4911,7 +5078,7 @@ static void rbd_dev_device_release(struct device *dev) | |||
4911 | 5078 | ||
4912 | rbd_free_disk(rbd_dev); | 5079 | rbd_free_disk(rbd_dev); |
4913 | clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); | 5080 | clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); |
4914 | rbd_dev_clear_mapping(rbd_dev); | 5081 | rbd_dev_mapping_clear(rbd_dev); |
4915 | unregister_blkdev(rbd_dev->major, rbd_dev->name); | 5082 | unregister_blkdev(rbd_dev->major, rbd_dev->name); |
4916 | rbd_dev->major = 0; | 5083 | rbd_dev->major = 0; |
4917 | rbd_dev_id_put(rbd_dev); | 5084 | rbd_dev_id_put(rbd_dev); |
@@ -4978,10 +5145,13 @@ static ssize_t rbd_remove(struct bus_type *bus, | |||
4978 | spin_unlock_irq(&rbd_dev->lock); | 5145 | spin_unlock_irq(&rbd_dev->lock); |
4979 | if (ret < 0) | 5146 | if (ret < 0) |
4980 | goto done; | 5147 | goto done; |
4981 | ret = count; | ||
4982 | rbd_bus_del_dev(rbd_dev); | 5148 | rbd_bus_del_dev(rbd_dev); |
5149 | ret = rbd_dev_header_watch_sync(rbd_dev, false); | ||
5150 | if (ret) | ||
5151 | rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret); | ||
4983 | rbd_dev_image_release(rbd_dev); | 5152 | rbd_dev_image_release(rbd_dev); |
4984 | module_put(THIS_MODULE); | 5153 | module_put(THIS_MODULE); |
5154 | ret = count; | ||
4985 | done: | 5155 | done: |
4986 | mutex_unlock(&ctl_mutex); | 5156 | mutex_unlock(&ctl_mutex); |
4987 | 5157 | ||
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index f8ef15f37c5e..3fd130fdfbc1 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c | |||
@@ -1160,8 +1160,7 @@ static int ace_probe(struct platform_device *dev) | |||
1160 | dev_dbg(&dev->dev, "ace_probe(%p)\n", dev); | 1160 | dev_dbg(&dev->dev, "ace_probe(%p)\n", dev); |
1161 | 1161 | ||
1162 | /* device id and bus width */ | 1162 | /* device id and bus width */ |
1163 | of_property_read_u32(dev->dev.of_node, "port-number", &id); | 1163 | if (of_property_read_u32(dev->dev.of_node, "port-number", &id)) |
1164 | if (id < 0) | ||
1165 | id = 0; | 1164 | id = 0; |
1166 | if (of_find_property(dev->dev.of_node, "8-bit", NULL)) | 1165 | if (of_find_property(dev->dev.of_node, "8-bit", NULL)) |
1167 | bus_width = ACE_BUS_WIDTH_8; | 1166 | bus_width = ACE_BUS_WIDTH_8; |