aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2013-07-02 02:31:48 -0400
committerJens Axboe <axboe@kernel.dk>2013-07-02 02:31:48 -0400
commit5f0e5afa0de4522abb3ea7d1369039b94e740ec5 (patch)
tree6a5be3db9ecfed8ef2150c6146f6d1e0d658ac8b /drivers/block
parentd752b2696072ed52fd5afab08b601e2220a3b87e (diff)
parent9e895ace5d82df8929b16f58e9f515f6d54ab82d (diff)
Merge tag 'v3.10-rc7' into for-3.11/drivers
Linux 3.10-rc7 Pull this in early to avoid doing it with the bcache merge, since there are a number of changes to bcache between my old base (3.10-rc1) and the new pull request.
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/brd.c4
-rw-r--r--drivers/block/cciss.c32
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c8
-rw-r--r--drivers/block/nvme-core.c62
-rw-r--r--drivers/block/nvme-scsi.c3
-rw-r--r--drivers/block/pktcdvd.c3
-rw-r--r--drivers/block/rbd.c974
-rw-r--r--drivers/block/xsysace.c3
8 files changed, 647 insertions, 442 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index f1a29f8e9d33..9bf4371755f2 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -117,13 +117,13 @@ static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
117 117
118 spin_lock(&brd->brd_lock); 118 spin_lock(&brd->brd_lock);
119 idx = sector >> PAGE_SECTORS_SHIFT; 119 idx = sector >> PAGE_SECTORS_SHIFT;
120 page->index = idx;
120 if (radix_tree_insert(&brd->brd_pages, idx, page)) { 121 if (radix_tree_insert(&brd->brd_pages, idx, page)) {
121 __free_page(page); 122 __free_page(page);
122 page = radix_tree_lookup(&brd->brd_pages, idx); 123 page = radix_tree_lookup(&brd->brd_pages, idx);
123 BUG_ON(!page); 124 BUG_ON(!page);
124 BUG_ON(page->index != idx); 125 BUG_ON(page->index != idx);
125 } else 126 }
126 page->index = idx;
127 spin_unlock(&brd->brd_lock); 127 spin_unlock(&brd->brd_lock);
128 128
129 radix_tree_preload_end(); 129 radix_tree_preload_end();
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 6374dc103521..62b6c2cc80b5 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -168,8 +168,6 @@ static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id);
168static int cciss_open(struct block_device *bdev, fmode_t mode); 168static int cciss_open(struct block_device *bdev, fmode_t mode);
169static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode); 169static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode);
170static void cciss_release(struct gendisk *disk, fmode_t mode); 170static void cciss_release(struct gendisk *disk, fmode_t mode);
171static int do_ioctl(struct block_device *bdev, fmode_t mode,
172 unsigned int cmd, unsigned long arg);
173static int cciss_ioctl(struct block_device *bdev, fmode_t mode, 171static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
174 unsigned int cmd, unsigned long arg); 172 unsigned int cmd, unsigned long arg);
175static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo); 173static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
@@ -235,7 +233,7 @@ static const struct block_device_operations cciss_fops = {
235 .owner = THIS_MODULE, 233 .owner = THIS_MODULE,
236 .open = cciss_unlocked_open, 234 .open = cciss_unlocked_open,
237 .release = cciss_release, 235 .release = cciss_release,
238 .ioctl = do_ioctl, 236 .ioctl = cciss_ioctl,
239 .getgeo = cciss_getgeo, 237 .getgeo = cciss_getgeo,
240#ifdef CONFIG_COMPAT 238#ifdef CONFIG_COMPAT
241 .compat_ioctl = cciss_compat_ioctl, 239 .compat_ioctl = cciss_compat_ioctl,
@@ -1143,16 +1141,6 @@ static void cciss_release(struct gendisk *disk, fmode_t mode)
1143 mutex_unlock(&cciss_mutex); 1141 mutex_unlock(&cciss_mutex);
1144} 1142}
1145 1143
1146static int do_ioctl(struct block_device *bdev, fmode_t mode,
1147 unsigned cmd, unsigned long arg)
1148{
1149 int ret;
1150 mutex_lock(&cciss_mutex);
1151 ret = cciss_ioctl(bdev, mode, cmd, arg);
1152 mutex_unlock(&cciss_mutex);
1153 return ret;
1154}
1155
1156#ifdef CONFIG_COMPAT 1144#ifdef CONFIG_COMPAT
1157 1145
1158static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode, 1146static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
@@ -1179,7 +1167,7 @@ static int cciss_compat_ioctl(struct block_device *bdev, fmode_t mode,
1179 case CCISS_REGNEWD: 1167 case CCISS_REGNEWD:
1180 case CCISS_RESCANDISK: 1168 case CCISS_RESCANDISK:
1181 case CCISS_GETLUNINFO: 1169 case CCISS_GETLUNINFO:
1182 return do_ioctl(bdev, mode, cmd, arg); 1170 return cciss_ioctl(bdev, mode, cmd, arg);
1183 1171
1184 case CCISS_PASSTHRU32: 1172 case CCISS_PASSTHRU32:
1185 return cciss_ioctl32_passthru(bdev, mode, cmd, arg); 1173 return cciss_ioctl32_passthru(bdev, mode, cmd, arg);
@@ -1219,7 +1207,7 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
1219 if (err) 1207 if (err)
1220 return -EFAULT; 1208 return -EFAULT;
1221 1209
1222 err = do_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p); 1210 err = cciss_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p);
1223 if (err) 1211 if (err)
1224 return err; 1212 return err;
1225 err |= 1213 err |=
@@ -1261,7 +1249,7 @@ static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
1261 if (err) 1249 if (err)
1262 return -EFAULT; 1250 return -EFAULT;
1263 1251
1264 err = do_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p); 1252 err = cciss_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p);
1265 if (err) 1253 if (err)
1266 return err; 1254 return err;
1267 err |= 1255 err |=
@@ -1311,11 +1299,14 @@ static int cciss_getpciinfo(ctlr_info_t *h, void __user *argp)
1311static int cciss_getintinfo(ctlr_info_t *h, void __user *argp) 1299static int cciss_getintinfo(ctlr_info_t *h, void __user *argp)
1312{ 1300{
1313 cciss_coalint_struct intinfo; 1301 cciss_coalint_struct intinfo;
1302 unsigned long flags;
1314 1303
1315 if (!argp) 1304 if (!argp)
1316 return -EINVAL; 1305 return -EINVAL;
1306 spin_lock_irqsave(&h->lock, flags);
1317 intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay); 1307 intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay);
1318 intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount); 1308 intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount);
1309 spin_unlock_irqrestore(&h->lock, flags);
1319 if (copy_to_user 1310 if (copy_to_user
1320 (argp, &intinfo, sizeof(cciss_coalint_struct))) 1311 (argp, &intinfo, sizeof(cciss_coalint_struct)))
1321 return -EFAULT; 1312 return -EFAULT;
@@ -1356,12 +1347,15 @@ static int cciss_setintinfo(ctlr_info_t *h, void __user *argp)
1356static int cciss_getnodename(ctlr_info_t *h, void __user *argp) 1347static int cciss_getnodename(ctlr_info_t *h, void __user *argp)
1357{ 1348{
1358 NodeName_type NodeName; 1349 NodeName_type NodeName;
1350 unsigned long flags;
1359 int i; 1351 int i;
1360 1352
1361 if (!argp) 1353 if (!argp)
1362 return -EINVAL; 1354 return -EINVAL;
1355 spin_lock_irqsave(&h->lock, flags);
1363 for (i = 0; i < 16; i++) 1356 for (i = 0; i < 16; i++)
1364 NodeName[i] = readb(&h->cfgtable->ServerName[i]); 1357 NodeName[i] = readb(&h->cfgtable->ServerName[i]);
1358 spin_unlock_irqrestore(&h->lock, flags);
1365 if (copy_to_user(argp, NodeName, sizeof(NodeName_type))) 1359 if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
1366 return -EFAULT; 1360 return -EFAULT;
1367 return 0; 1361 return 0;
@@ -1398,10 +1392,13 @@ static int cciss_setnodename(ctlr_info_t *h, void __user *argp)
1398static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp) 1392static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp)
1399{ 1393{
1400 Heartbeat_type heartbeat; 1394 Heartbeat_type heartbeat;
1395 unsigned long flags;
1401 1396
1402 if (!argp) 1397 if (!argp)
1403 return -EINVAL; 1398 return -EINVAL;
1399 spin_lock_irqsave(&h->lock, flags);
1404 heartbeat = readl(&h->cfgtable->HeartBeat); 1400 heartbeat = readl(&h->cfgtable->HeartBeat);
1401 spin_unlock_irqrestore(&h->lock, flags);
1405 if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type))) 1402 if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type)))
1406 return -EFAULT; 1403 return -EFAULT;
1407 return 0; 1404 return 0;
@@ -1410,10 +1407,13 @@ static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp)
1410static int cciss_getbustypes(ctlr_info_t *h, void __user *argp) 1407static int cciss_getbustypes(ctlr_info_t *h, void __user *argp)
1411{ 1408{
1412 BusTypes_type BusTypes; 1409 BusTypes_type BusTypes;
1410 unsigned long flags;
1413 1411
1414 if (!argp) 1412 if (!argp)
1415 return -EINVAL; 1413 return -EINVAL;
1414 spin_lock_irqsave(&h->lock, flags);
1416 BusTypes = readl(&h->cfgtable->BusTypes); 1415 BusTypes = readl(&h->cfgtable->BusTypes);
1416 spin_unlock_irqrestore(&h->lock, flags);
1417 if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type))) 1417 if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type)))
1418 return -EFAULT; 1418 return -EFAULT;
1419 return 0; 1419 return 0;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 847107ef0cce..20dd52a2f92f 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3002,7 +3002,8 @@ static int mtip_hw_debugfs_init(struct driver_data *dd)
3002 3002
3003static void mtip_hw_debugfs_exit(struct driver_data *dd) 3003static void mtip_hw_debugfs_exit(struct driver_data *dd)
3004{ 3004{
3005 debugfs_remove_recursive(dd->dfs_node); 3005 if (dd->dfs_node)
3006 debugfs_remove_recursive(dd->dfs_node);
3006} 3007}
3007 3008
3008 3009
@@ -3863,7 +3864,7 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3863 struct driver_data *dd = queue->queuedata; 3864 struct driver_data *dd = queue->queuedata;
3864 struct scatterlist *sg; 3865 struct scatterlist *sg;
3865 struct bio_vec *bvec; 3866 struct bio_vec *bvec;
3866 int nents = 0; 3867 int i, nents = 0;
3867 int tag = 0, unaligned = 0; 3868 int tag = 0, unaligned = 0;
3868 3869
3869 if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { 3870 if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
@@ -3921,11 +3922,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3921 } 3922 }
3922 3923
3923 /* Create the scatter list for this bio. */ 3924 /* Create the scatter list for this bio. */
3924 bio_for_each_segment(bvec, bio, nents) { 3925 bio_for_each_segment(bvec, bio, i) {
3925 sg_set_page(&sg[nents], 3926 sg_set_page(&sg[nents],
3926 bvec->bv_page, 3927 bvec->bv_page,
3927 bvec->bv_len, 3928 bvec->bv_len,
3928 bvec->bv_offset); 3929 bvec->bv_offset);
3930 nents++;
3929 } 3931 }
3930 3932
3931 /* Issue the read/write. */ 3933 /* Issue the read/write. */
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 8efdfaa44a59..ce79a590b45b 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -629,7 +629,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
629 struct nvme_command *cmnd; 629 struct nvme_command *cmnd;
630 struct nvme_iod *iod; 630 struct nvme_iod *iod;
631 enum dma_data_direction dma_dir; 631 enum dma_data_direction dma_dir;
632 int cmdid, length, result = -ENOMEM; 632 int cmdid, length, result;
633 u16 control; 633 u16 control;
634 u32 dsmgmt; 634 u32 dsmgmt;
635 int psegs = bio_phys_segments(ns->queue, bio); 635 int psegs = bio_phys_segments(ns->queue, bio);
@@ -640,6 +640,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
640 return result; 640 return result;
641 } 641 }
642 642
643 result = -ENOMEM;
643 iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC); 644 iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC);
644 if (!iod) 645 if (!iod)
645 goto nomem; 646 goto nomem;
@@ -977,6 +978,8 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
977 978
978 if (timeout && !time_after(now, info[cmdid].timeout)) 979 if (timeout && !time_after(now, info[cmdid].timeout))
979 continue; 980 continue;
981 if (info[cmdid].ctx == CMD_CTX_CANCELLED)
982 continue;
980 dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid); 983 dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d\n", cmdid);
981 ctx = cancel_cmdid(nvmeq, cmdid, &fn); 984 ctx = cancel_cmdid(nvmeq, cmdid, &fn);
982 fn(nvmeq->dev, ctx, &cqe); 985 fn(nvmeq->dev, ctx, &cqe);
@@ -1206,7 +1209,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
1206 1209
1207 if (addr & 3) 1210 if (addr & 3)
1208 return ERR_PTR(-EINVAL); 1211 return ERR_PTR(-EINVAL);
1209 if (!length) 1212 if (!length || length > INT_MAX - PAGE_SIZE)
1210 return ERR_PTR(-EINVAL); 1213 return ERR_PTR(-EINVAL);
1211 1214
1212 offset = offset_in_page(addr); 1215 offset = offset_in_page(addr);
@@ -1227,7 +1230,8 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
1227 sg_init_table(sg, count); 1230 sg_init_table(sg, count);
1228 for (i = 0; i < count; i++) { 1231 for (i = 0; i < count; i++) {
1229 sg_set_page(&sg[i], pages[i], 1232 sg_set_page(&sg[i], pages[i],
1230 min_t(int, length, PAGE_SIZE - offset), offset); 1233 min_t(unsigned, length, PAGE_SIZE - offset),
1234 offset);
1231 length -= (PAGE_SIZE - offset); 1235 length -= (PAGE_SIZE - offset);
1232 offset = 0; 1236 offset = 0;
1233 } 1237 }
@@ -1435,7 +1439,7 @@ static int nvme_user_admin_cmd(struct nvme_dev *dev,
1435 nvme_free_iod(dev, iod); 1439 nvme_free_iod(dev, iod);
1436 } 1440 }
1437 1441
1438 if (!status && copy_to_user(&ucmd->result, &cmd.result, 1442 if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result,
1439 sizeof(cmd.result))) 1443 sizeof(cmd.result)))
1440 status = -EFAULT; 1444 status = -EFAULT;
1441 1445
@@ -1633,7 +1637,8 @@ static int set_queue_count(struct nvme_dev *dev, int count)
1633 1637
1634static int nvme_setup_io_queues(struct nvme_dev *dev) 1638static int nvme_setup_io_queues(struct nvme_dev *dev)
1635{ 1639{
1636 int result, cpu, i, nr_io_queues, db_bar_size, q_depth; 1640 struct pci_dev *pdev = dev->pci_dev;
1641 int result, cpu, i, nr_io_queues, db_bar_size, q_depth, q_count;
1637 1642
1638 nr_io_queues = num_online_cpus(); 1643 nr_io_queues = num_online_cpus();
1639 result = set_queue_count(dev, nr_io_queues); 1644 result = set_queue_count(dev, nr_io_queues);
@@ -1642,14 +1647,14 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1642 if (result < nr_io_queues) 1647 if (result < nr_io_queues)
1643 nr_io_queues = result; 1648 nr_io_queues = result;
1644 1649
1650 q_count = nr_io_queues;
1645 /* Deregister the admin queue's interrupt */ 1651 /* Deregister the admin queue's interrupt */
1646 free_irq(dev->entry[0].vector, dev->queues[0]); 1652 free_irq(dev->entry[0].vector, dev->queues[0]);
1647 1653
1648 db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3)); 1654 db_bar_size = 4096 + ((nr_io_queues + 1) << (dev->db_stride + 3));
1649 if (db_bar_size > 8192) { 1655 if (db_bar_size > 8192) {
1650 iounmap(dev->bar); 1656 iounmap(dev->bar);
1651 dev->bar = ioremap(pci_resource_start(dev->pci_dev, 0), 1657 dev->bar = ioremap(pci_resource_start(pdev, 0), db_bar_size);
1652 db_bar_size);
1653 dev->dbs = ((void __iomem *)dev->bar) + 4096; 1658 dev->dbs = ((void __iomem *)dev->bar) + 4096;
1654 dev->queues[0]->q_db = dev->dbs; 1659 dev->queues[0]->q_db = dev->dbs;
1655 } 1660 }
@@ -1657,19 +1662,36 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
1657 for (i = 0; i < nr_io_queues; i++) 1662 for (i = 0; i < nr_io_queues; i++)
1658 dev->entry[i].entry = i; 1663 dev->entry[i].entry = i;
1659 for (;;) { 1664 for (;;) {
1660 result = pci_enable_msix(dev->pci_dev, dev->entry, 1665 result = pci_enable_msix(pdev, dev->entry, nr_io_queues);
1661 nr_io_queues);
1662 if (result == 0) { 1666 if (result == 0) {
1663 break; 1667 break;
1664 } else if (result > 0) { 1668 } else if (result > 0) {
1665 nr_io_queues = result; 1669 nr_io_queues = result;
1666 continue; 1670 continue;
1667 } else { 1671 } else {
1668 nr_io_queues = 1; 1672 nr_io_queues = 0;
1669 break; 1673 break;
1670 } 1674 }
1671 } 1675 }
1672 1676
1677 if (nr_io_queues == 0) {
1678 nr_io_queues = q_count;
1679 for (;;) {
1680 result = pci_enable_msi_block(pdev, nr_io_queues);
1681 if (result == 0) {
1682 for (i = 0; i < nr_io_queues; i++)
1683 dev->entry[i].vector = i + pdev->irq;
1684 break;
1685 } else if (result > 0) {
1686 nr_io_queues = result;
1687 continue;
1688 } else {
1689 nr_io_queues = 1;
1690 break;
1691 }
1692 }
1693 }
1694
1673 result = queue_request_irq(dev, dev->queues[0], "nvme admin"); 1695 result = queue_request_irq(dev, dev->queues[0], "nvme admin");
1674 /* XXX: handle failure here */ 1696 /* XXX: handle failure here */
1675 1697
@@ -1850,7 +1872,10 @@ static void nvme_free_dev(struct kref *kref)
1850{ 1872{
1851 struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); 1873 struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
1852 nvme_dev_remove(dev); 1874 nvme_dev_remove(dev);
1853 pci_disable_msix(dev->pci_dev); 1875 if (dev->pci_dev->msi_enabled)
1876 pci_disable_msi(dev->pci_dev);
1877 else if (dev->pci_dev->msix_enabled)
1878 pci_disable_msix(dev->pci_dev);
1854 iounmap(dev->bar); 1879 iounmap(dev->bar);
1855 nvme_release_instance(dev); 1880 nvme_release_instance(dev);
1856 nvme_release_prp_pools(dev); 1881 nvme_release_prp_pools(dev);
@@ -1923,8 +1948,14 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1923 INIT_LIST_HEAD(&dev->namespaces); 1948 INIT_LIST_HEAD(&dev->namespaces);
1924 dev->pci_dev = pdev; 1949 dev->pci_dev = pdev;
1925 pci_set_drvdata(pdev, dev); 1950 pci_set_drvdata(pdev, dev);
1926 dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); 1951
1927 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); 1952 if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)))
1953 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1954 else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)))
1955 dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1956 else
1957 goto disable;
1958
1928 result = nvme_set_instance(dev); 1959 result = nvme_set_instance(dev);
1929 if (result) 1960 if (result)
1930 goto disable; 1961 goto disable;
@@ -1977,7 +2008,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1977 unmap: 2008 unmap:
1978 iounmap(dev->bar); 2009 iounmap(dev->bar);
1979 disable_msix: 2010 disable_msix:
1980 pci_disable_msix(pdev); 2011 if (dev->pci_dev->msi_enabled)
2012 pci_disable_msi(dev->pci_dev);
2013 else if (dev->pci_dev->msix_enabled)
2014 pci_disable_msix(dev->pci_dev);
1981 nvme_release_instance(dev); 2015 nvme_release_instance(dev);
1982 nvme_release_prp_pools(dev); 2016 nvme_release_prp_pools(dev);
1983 disable: 2017 disable:
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index fed54b039893..102de2f52b5c 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -44,7 +44,6 @@
44#include <linux/sched.h> 44#include <linux/sched.h>
45#include <linux/slab.h> 45#include <linux/slab.h>
46#include <linux/types.h> 46#include <linux/types.h>
47#include <linux/version.h>
48#include <scsi/sg.h> 47#include <scsi/sg.h>
49#include <scsi/scsi.h> 48#include <scsi/scsi.h>
50 49
@@ -1654,7 +1653,7 @@ static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
1654 } 1653 }
1655} 1654}
1656 1655
1657static u16 nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr, 1656static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
1658 u8 *mode_page, u8 page_code) 1657 u8 *mode_page, u8 page_code)
1659{ 1658{
1660 int res = SNTI_TRANSLATION_SUCCESS; 1659 int res = SNTI_TRANSLATION_SUCCESS;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 3c08983e600a..f5d0ea11d9fd 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -83,7 +83,8 @@
83 83
84#define MAX_SPEED 0xffff 84#define MAX_SPEED 0xffff
85 85
86#define ZONE(sector, pd) (((sector) + (pd)->offset) & ~((pd)->settings.size - 1)) 86#define ZONE(sector, pd) (((sector) + (pd)->offset) & \
87 ~(sector_t)((pd)->settings.size - 1))
87 88
88static DEFINE_MUTEX(pktcdvd_mutex); 89static DEFINE_MUTEX(pktcdvd_mutex);
89static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; 90static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ca63104136e0..49394e3f31bc 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -55,6 +55,39 @@
55#define SECTOR_SHIFT 9 55#define SECTOR_SHIFT 9
56#define SECTOR_SIZE (1ULL << SECTOR_SHIFT) 56#define SECTOR_SIZE (1ULL << SECTOR_SHIFT)
57 57
58/*
59 * Increment the given counter and return its updated value.
60 * If the counter is already 0 it will not be incremented.
61 * If the counter is already at its maximum value returns
62 * -EINVAL without updating it.
63 */
64static int atomic_inc_return_safe(atomic_t *v)
65{
66 unsigned int counter;
67
68 counter = (unsigned int)__atomic_add_unless(v, 1, 0);
69 if (counter <= (unsigned int)INT_MAX)
70 return (int)counter;
71
72 atomic_dec(v);
73
74 return -EINVAL;
75}
76
77/* Decrement the counter. Return the resulting value, or -EINVAL */
78static int atomic_dec_return_safe(atomic_t *v)
79{
80 int counter;
81
82 counter = atomic_dec_return(v);
83 if (counter >= 0)
84 return counter;
85
86 atomic_inc(v);
87
88 return -EINVAL;
89}
90
58#define RBD_DRV_NAME "rbd" 91#define RBD_DRV_NAME "rbd"
59#define RBD_DRV_NAME_LONG "rbd (rados block device)" 92#define RBD_DRV_NAME_LONG "rbd (rados block device)"
60 93
@@ -100,21 +133,20 @@
100 * block device image metadata (in-memory version) 133 * block device image metadata (in-memory version)
101 */ 134 */
102struct rbd_image_header { 135struct rbd_image_header {
103 /* These four fields never change for a given rbd image */ 136 /* These six fields never change for a given rbd image */
104 char *object_prefix; 137 char *object_prefix;
105 u64 features;
106 __u8 obj_order; 138 __u8 obj_order;
107 __u8 crypt_type; 139 __u8 crypt_type;
108 __u8 comp_type; 140 __u8 comp_type;
141 u64 stripe_unit;
142 u64 stripe_count;
143 u64 features; /* Might be changeable someday? */
109 144
110 /* The remaining fields need to be updated occasionally */ 145 /* The remaining fields need to be updated occasionally */
111 u64 image_size; 146 u64 image_size;
112 struct ceph_snap_context *snapc; 147 struct ceph_snap_context *snapc;
113 char *snap_names; 148 char *snap_names; /* format 1 only */
114 u64 *snap_sizes; 149 u64 *snap_sizes; /* format 1 only */
115
116 u64 stripe_unit;
117 u64 stripe_count;
118}; 150};
119 151
120/* 152/*
@@ -225,6 +257,7 @@ struct rbd_obj_request {
225 }; 257 };
226 }; 258 };
227 struct page **copyup_pages; 259 struct page **copyup_pages;
260 u32 copyup_page_count;
228 261
229 struct ceph_osd_request *osd_req; 262 struct ceph_osd_request *osd_req;
230 263
@@ -257,6 +290,7 @@ struct rbd_img_request {
257 struct rbd_obj_request *obj_request; /* obj req initiator */ 290 struct rbd_obj_request *obj_request; /* obj req initiator */
258 }; 291 };
259 struct page **copyup_pages; 292 struct page **copyup_pages;
293 u32 copyup_page_count;
260 spinlock_t completion_lock;/* protects next_completion */ 294 spinlock_t completion_lock;/* protects next_completion */
261 u32 next_completion; 295 u32 next_completion;
262 rbd_img_callback_t callback; 296 rbd_img_callback_t callback;
@@ -311,6 +345,7 @@ struct rbd_device {
311 345
312 struct rbd_spec *parent_spec; 346 struct rbd_spec *parent_spec;
313 u64 parent_overlap; 347 u64 parent_overlap;
348 atomic_t parent_ref;
314 struct rbd_device *parent; 349 struct rbd_device *parent;
315 350
316 /* protects updating the header */ 351 /* protects updating the header */
@@ -359,7 +394,8 @@ static ssize_t rbd_add(struct bus_type *bus, const char *buf,
359 size_t count); 394 size_t count);
360static ssize_t rbd_remove(struct bus_type *bus, const char *buf, 395static ssize_t rbd_remove(struct bus_type *bus, const char *buf,
361 size_t count); 396 size_t count);
362static int rbd_dev_image_probe(struct rbd_device *rbd_dev); 397static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
398static void rbd_spec_put(struct rbd_spec *spec);
363 399
364static struct bus_attribute rbd_bus_attrs[] = { 400static struct bus_attribute rbd_bus_attrs[] = {
365 __ATTR(add, S_IWUSR, NULL, rbd_add), 401 __ATTR(add, S_IWUSR, NULL, rbd_add),
@@ -426,7 +462,8 @@ static void rbd_img_parent_read(struct rbd_obj_request *obj_request);
426static void rbd_dev_remove_parent(struct rbd_device *rbd_dev); 462static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);
427 463
428static int rbd_dev_refresh(struct rbd_device *rbd_dev); 464static int rbd_dev_refresh(struct rbd_device *rbd_dev);
429static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev); 465static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
466static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev);
430static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, 467static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
431 u64 snap_id); 468 u64 snap_id);
432static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, 469static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
@@ -482,8 +519,8 @@ static const struct block_device_operations rbd_bd_ops = {
482}; 519};
483 520
484/* 521/*
485 * Initialize an rbd client instance. 522 * Initialize an rbd client instance. Success or not, this function
486 * We own *ceph_opts. 523 * consumes ceph_opts.
487 */ 524 */
488static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) 525static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts)
489{ 526{
@@ -638,7 +675,8 @@ static int parse_rbd_opts_token(char *c, void *private)
638 675
639/* 676/*
640 * Get a ceph client with specific addr and configuration, if one does 677 * Get a ceph client with specific addr and configuration, if one does
641 * not exist create it. 678 * not exist create it. Either way, ceph_opts is consumed by this
679 * function.
642 */ 680 */
643static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) 681static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts)
644{ 682{
@@ -726,88 +764,123 @@ static bool rbd_dev_ondisk_valid(struct rbd_image_header_ondisk *ondisk)
726} 764}
727 765
728/* 766/*
729 * Create a new header structure, translate header format from the on-disk 767 * Fill an rbd image header with information from the given format 1
730 * header. 768 * on-disk header.
731 */ 769 */
732static int rbd_header_from_disk(struct rbd_image_header *header, 770static int rbd_header_from_disk(struct rbd_device *rbd_dev,
733 struct rbd_image_header_ondisk *ondisk) 771 struct rbd_image_header_ondisk *ondisk)
734{ 772{
773 struct rbd_image_header *header = &rbd_dev->header;
774 bool first_time = header->object_prefix == NULL;
775 struct ceph_snap_context *snapc;
776 char *object_prefix = NULL;
777 char *snap_names = NULL;
778 u64 *snap_sizes = NULL;
735 u32 snap_count; 779 u32 snap_count;
736 size_t len;
737 size_t size; 780 size_t size;
781 int ret = -ENOMEM;
738 u32 i; 782 u32 i;
739 783
740 memset(header, 0, sizeof (*header)); 784 /* Allocate this now to avoid having to handle failure below */
741 785
742 snap_count = le32_to_cpu(ondisk->snap_count); 786 if (first_time) {
787 size_t len;
743 788
744 len = strnlen(ondisk->object_prefix, sizeof (ondisk->object_prefix)); 789 len = strnlen(ondisk->object_prefix,
745 header->object_prefix = kmalloc(len + 1, GFP_KERNEL); 790 sizeof (ondisk->object_prefix));
746 if (!header->object_prefix) 791 object_prefix = kmalloc(len + 1, GFP_KERNEL);
747 return -ENOMEM; 792 if (!object_prefix)
748 memcpy(header->object_prefix, ondisk->object_prefix, len); 793 return -ENOMEM;
749 header->object_prefix[len] = '\0'; 794 memcpy(object_prefix, ondisk->object_prefix, len);
795 object_prefix[len] = '\0';
796 }
797
798 /* Allocate the snapshot context and fill it in */
750 799
800 snap_count = le32_to_cpu(ondisk->snap_count);
801 snapc = ceph_create_snap_context(snap_count, GFP_KERNEL);
802 if (!snapc)
803 goto out_err;
804 snapc->seq = le64_to_cpu(ondisk->snap_seq);
751 if (snap_count) { 805 if (snap_count) {
806 struct rbd_image_snap_ondisk *snaps;
752 u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len); 807 u64 snap_names_len = le64_to_cpu(ondisk->snap_names_len);
753 808
754 /* Save a copy of the snapshot names */ 809 /* We'll keep a copy of the snapshot names... */
755 810
756 if (snap_names_len > (u64) SIZE_MAX) 811 if (snap_names_len > (u64)SIZE_MAX)
757 return -EIO; 812 goto out_2big;
758 header->snap_names = kmalloc(snap_names_len, GFP_KERNEL); 813 snap_names = kmalloc(snap_names_len, GFP_KERNEL);
759 if (!header->snap_names) 814 if (!snap_names)
815 goto out_err;
816
817 /* ...as well as the array of their sizes. */
818
819 size = snap_count * sizeof (*header->snap_sizes);
820 snap_sizes = kmalloc(size, GFP_KERNEL);
821 if (!snap_sizes)
760 goto out_err; 822 goto out_err;
823
761 /* 824 /*
762 * Note that rbd_dev_v1_header_read() guarantees 825 * Copy the names, and fill in each snapshot's id
763 * the ondisk buffer we're working with has 826 * and size.
827 *
828 * Note that rbd_dev_v1_header_info() guarantees the
829 * ondisk buffer we're working with has
764 * snap_names_len bytes beyond the end of the 830 * snap_names_len bytes beyond the end of the
765 * snapshot id array, this memcpy() is safe. 831 * snapshot id array, this memcpy() is safe.
766 */ 832 */
767 memcpy(header->snap_names, &ondisk->snaps[snap_count], 833 memcpy(snap_names, &ondisk->snaps[snap_count], snap_names_len);
768 snap_names_len); 834 snaps = ondisk->snaps;
835 for (i = 0; i < snap_count; i++) {
836 snapc->snaps[i] = le64_to_cpu(snaps[i].id);
837 snap_sizes[i] = le64_to_cpu(snaps[i].image_size);
838 }
839 }
769 840
770 /* Record each snapshot's size */ 841 /* We won't fail any more, fill in the header */
771 842
772 size = snap_count * sizeof (*header->snap_sizes); 843 down_write(&rbd_dev->header_rwsem);
773 header->snap_sizes = kmalloc(size, GFP_KERNEL); 844 if (first_time) {
774 if (!header->snap_sizes) 845 header->object_prefix = object_prefix;
775 goto out_err; 846 header->obj_order = ondisk->options.order;
776 for (i = 0; i < snap_count; i++) 847 header->crypt_type = ondisk->options.crypt_type;
777 header->snap_sizes[i] = 848 header->comp_type = ondisk->options.comp_type;
778 le64_to_cpu(ondisk->snaps[i].image_size); 849 /* The rest aren't used for format 1 images */
850 header->stripe_unit = 0;
851 header->stripe_count = 0;
852 header->features = 0;
779 } else { 853 } else {
780 header->snap_names = NULL; 854 ceph_put_snap_context(header->snapc);
781 header->snap_sizes = NULL; 855 kfree(header->snap_names);
856 kfree(header->snap_sizes);
782 } 857 }
783 858
784 header->features = 0; /* No features support in v1 images */ 859 /* The remaining fields always get updated (when we refresh) */
785 header->obj_order = ondisk->options.order;
786 header->crypt_type = ondisk->options.crypt_type;
787 header->comp_type = ondisk->options.comp_type;
788
789 /* Allocate and fill in the snapshot context */
790 860
791 header->image_size = le64_to_cpu(ondisk->image_size); 861 header->image_size = le64_to_cpu(ondisk->image_size);
862 header->snapc = snapc;
863 header->snap_names = snap_names;
864 header->snap_sizes = snap_sizes;
792 865
793 header->snapc = ceph_create_snap_context(snap_count, GFP_KERNEL); 866 /* Make sure mapping size is consistent with header info */
794 if (!header->snapc)
795 goto out_err;
796 header->snapc->seq = le64_to_cpu(ondisk->snap_seq);
797 for (i = 0; i < snap_count; i++)
798 header->snapc->snaps[i] = le64_to_cpu(ondisk->snaps[i].id);
799 867
800 return 0; 868 if (rbd_dev->spec->snap_id == CEPH_NOSNAP || first_time)
869 if (rbd_dev->mapping.size != header->image_size)
870 rbd_dev->mapping.size = header->image_size;
801 871
872 up_write(&rbd_dev->header_rwsem);
873
874 return 0;
875out_2big:
876 ret = -EIO;
802out_err: 877out_err:
803 kfree(header->snap_sizes); 878 kfree(snap_sizes);
804 header->snap_sizes = NULL; 879 kfree(snap_names);
805 kfree(header->snap_names); 880 ceph_put_snap_context(snapc);
806 header->snap_names = NULL; 881 kfree(object_prefix);
807 kfree(header->object_prefix);
808 header->object_prefix = NULL;
809 882
810 return -ENOMEM; 883 return ret;
811} 884}
812 885
813static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which) 886static const char *_rbd_dev_v1_snap_name(struct rbd_device *rbd_dev, u32 which)
@@ -934,20 +1007,11 @@ static int rbd_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
934 1007
935static int rbd_dev_mapping_set(struct rbd_device *rbd_dev) 1008static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
936{ 1009{
937 const char *snap_name = rbd_dev->spec->snap_name; 1010 u64 snap_id = rbd_dev->spec->snap_id;
938 u64 snap_id;
939 u64 size = 0; 1011 u64 size = 0;
940 u64 features = 0; 1012 u64 features = 0;
941 int ret; 1013 int ret;
942 1014
943 if (strcmp(snap_name, RBD_SNAP_HEAD_NAME)) {
944 snap_id = rbd_snap_id_by_name(rbd_dev, snap_name);
945 if (snap_id == CEPH_NOSNAP)
946 return -ENOENT;
947 } else {
948 snap_id = CEPH_NOSNAP;
949 }
950
951 ret = rbd_snap_size(rbd_dev, snap_id, &size); 1015 ret = rbd_snap_size(rbd_dev, snap_id, &size);
952 if (ret) 1016 if (ret)
953 return ret; 1017 return ret;
@@ -958,11 +1022,6 @@ static int rbd_dev_mapping_set(struct rbd_device *rbd_dev)
958 rbd_dev->mapping.size = size; 1022 rbd_dev->mapping.size = size;
959 rbd_dev->mapping.features = features; 1023 rbd_dev->mapping.features = features;
960 1024
961 /* If we are mapping a snapshot it must be marked read-only */
962
963 if (snap_id != CEPH_NOSNAP)
964 rbd_dev->mapping.read_only = true;
965
966 return 0; 1025 return 0;
967} 1026}
968 1027
@@ -970,14 +1029,6 @@ static void rbd_dev_mapping_clear(struct rbd_device *rbd_dev)
970{ 1029{
971 rbd_dev->mapping.size = 0; 1030 rbd_dev->mapping.size = 0;
972 rbd_dev->mapping.features = 0; 1031 rbd_dev->mapping.features = 0;
973 rbd_dev->mapping.read_only = true;
974}
975
976static void rbd_dev_clear_mapping(struct rbd_device *rbd_dev)
977{
978 rbd_dev->mapping.size = 0;
979 rbd_dev->mapping.features = 0;
980 rbd_dev->mapping.read_only = true;
981} 1032}
982 1033
983static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) 1034static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
@@ -985,12 +1036,16 @@ static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset)
985 char *name; 1036 char *name;
986 u64 segment; 1037 u64 segment;
987 int ret; 1038 int ret;
1039 char *name_format;
988 1040
989 name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO); 1041 name = kmem_cache_alloc(rbd_segment_name_cache, GFP_NOIO);
990 if (!name) 1042 if (!name)
991 return NULL; 1043 return NULL;
992 segment = offset >> rbd_dev->header.obj_order; 1044 segment = offset >> rbd_dev->header.obj_order;
993 ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, "%s.%012llx", 1045 name_format = "%s.%012llx";
1046 if (rbd_dev->image_format == 2)
1047 name_format = "%s.%016llx";
1048 ret = snprintf(name, MAX_OBJ_NAME_SIZE + 1, name_format,
994 rbd_dev->header.object_prefix, segment); 1049 rbd_dev->header.object_prefix, segment);
995 if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) { 1050 if (ret < 0 || ret > MAX_OBJ_NAME_SIZE) {
996 pr_err("error formatting segment name for #%llu (%d)\n", 1051 pr_err("error formatting segment name for #%llu (%d)\n",
@@ -1342,20 +1397,18 @@ static void rbd_obj_request_put(struct rbd_obj_request *obj_request)
1342 kref_put(&obj_request->kref, rbd_obj_request_destroy); 1397 kref_put(&obj_request->kref, rbd_obj_request_destroy);
1343} 1398}
1344 1399
1345static void rbd_img_request_get(struct rbd_img_request *img_request) 1400static bool img_request_child_test(struct rbd_img_request *img_request);
1346{ 1401static void rbd_parent_request_destroy(struct kref *kref);
1347 dout("%s: img %p (was %d)\n", __func__, img_request,
1348 atomic_read(&img_request->kref.refcount));
1349 kref_get(&img_request->kref);
1350}
1351
1352static void rbd_img_request_destroy(struct kref *kref); 1402static void rbd_img_request_destroy(struct kref *kref);
1353static void rbd_img_request_put(struct rbd_img_request *img_request) 1403static void rbd_img_request_put(struct rbd_img_request *img_request)
1354{ 1404{
1355 rbd_assert(img_request != NULL); 1405 rbd_assert(img_request != NULL);
1356 dout("%s: img %p (was %d)\n", __func__, img_request, 1406 dout("%s: img %p (was %d)\n", __func__, img_request,
1357 atomic_read(&img_request->kref.refcount)); 1407 atomic_read(&img_request->kref.refcount));
1358 kref_put(&img_request->kref, rbd_img_request_destroy); 1408 if (img_request_child_test(img_request))
1409 kref_put(&img_request->kref, rbd_parent_request_destroy);
1410 else
1411 kref_put(&img_request->kref, rbd_img_request_destroy);
1359} 1412}
1360 1413
1361static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, 1414static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request,
@@ -1472,6 +1525,12 @@ static void img_request_child_set(struct rbd_img_request *img_request)
1472 smp_mb(); 1525 smp_mb();
1473} 1526}
1474 1527
1528static void img_request_child_clear(struct rbd_img_request *img_request)
1529{
1530 clear_bit(IMG_REQ_CHILD, &img_request->flags);
1531 smp_mb();
1532}
1533
1475static bool img_request_child_test(struct rbd_img_request *img_request) 1534static bool img_request_child_test(struct rbd_img_request *img_request)
1476{ 1535{
1477 smp_mb(); 1536 smp_mb();
@@ -1484,6 +1543,12 @@ static void img_request_layered_set(struct rbd_img_request *img_request)
1484 smp_mb(); 1543 smp_mb();
1485} 1544}
1486 1545
1546static void img_request_layered_clear(struct rbd_img_request *img_request)
1547{
1548 clear_bit(IMG_REQ_LAYERED, &img_request->flags);
1549 smp_mb();
1550}
1551
1487static bool img_request_layered_test(struct rbd_img_request *img_request) 1552static bool img_request_layered_test(struct rbd_img_request *img_request)
1488{ 1553{
1489 smp_mb(); 1554 smp_mb();
@@ -1827,6 +1892,74 @@ static void rbd_obj_request_destroy(struct kref *kref)
1827 kmem_cache_free(rbd_obj_request_cache, obj_request); 1892 kmem_cache_free(rbd_obj_request_cache, obj_request);
1828} 1893}
1829 1894
1895/* It's OK to call this for a device with no parent */
1896
1897static void rbd_spec_put(struct rbd_spec *spec);
1898static void rbd_dev_unparent(struct rbd_device *rbd_dev)
1899{
1900 rbd_dev_remove_parent(rbd_dev);
1901 rbd_spec_put(rbd_dev->parent_spec);
1902 rbd_dev->parent_spec = NULL;
1903 rbd_dev->parent_overlap = 0;
1904}
1905
1906/*
1907 * Parent image reference counting is used to determine when an
1908 * image's parent fields can be safely torn down--after there are no
1909 * more in-flight requests to the parent image. When the last
1910 * reference is dropped, cleaning them up is safe.
1911 */
1912static void rbd_dev_parent_put(struct rbd_device *rbd_dev)
1913{
1914 int counter;
1915
1916 if (!rbd_dev->parent_spec)
1917 return;
1918
1919 counter = atomic_dec_return_safe(&rbd_dev->parent_ref);
1920 if (counter > 0)
1921 return;
1922
1923 /* Last reference; clean up parent data structures */
1924
1925 if (!counter)
1926 rbd_dev_unparent(rbd_dev);
1927 else
1928 rbd_warn(rbd_dev, "parent reference underflow\n");
1929}
1930
1931/*
1932 * If an image has a non-zero parent overlap, get a reference to its
1933 * parent.
1934 *
1935 * We must get the reference before checking for the overlap to
1936 * coordinate properly with zeroing the parent overlap in
1937 * rbd_dev_v2_parent_info() when an image gets flattened. We
1938 * drop it again if there is no overlap.
1939 *
1940 * Returns true if the rbd device has a parent with a non-zero
1941 * overlap and a reference for it was successfully taken, or
1942 * false otherwise.
1943 */
1944static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
1945{
1946 int counter;
1947
1948 if (!rbd_dev->parent_spec)
1949 return false;
1950
1951 counter = atomic_inc_return_safe(&rbd_dev->parent_ref);
1952 if (counter > 0 && rbd_dev->parent_overlap)
1953 return true;
1954
1955 /* Image was flattened, but parent is not yet torn down */
1956
1957 if (counter < 0)
1958 rbd_warn(rbd_dev, "parent reference overflow\n");
1959
1960 return false;
1961}
1962
1830/* 1963/*
1831 * Caller is responsible for filling in the list of object requests 1964 * Caller is responsible for filling in the list of object requests
1832 * that comprises the image request, and the Linux request pointer 1965 * that comprises the image request, and the Linux request pointer
@@ -1835,8 +1968,7 @@ static void rbd_obj_request_destroy(struct kref *kref)
1835static struct rbd_img_request *rbd_img_request_create( 1968static struct rbd_img_request *rbd_img_request_create(
1836 struct rbd_device *rbd_dev, 1969 struct rbd_device *rbd_dev,
1837 u64 offset, u64 length, 1970 u64 offset, u64 length,
1838 bool write_request, 1971 bool write_request)
1839 bool child_request)
1840{ 1972{
1841 struct rbd_img_request *img_request; 1973 struct rbd_img_request *img_request;
1842 1974
@@ -1861,9 +1993,7 @@ static struct rbd_img_request *rbd_img_request_create(
1861 } else { 1993 } else {
1862 img_request->snap_id = rbd_dev->spec->snap_id; 1994 img_request->snap_id = rbd_dev->spec->snap_id;
1863 } 1995 }
1864 if (child_request) 1996 if (rbd_dev_parent_get(rbd_dev))
1865 img_request_child_set(img_request);
1866 if (rbd_dev->parent_spec)
1867 img_request_layered_set(img_request); 1997 img_request_layered_set(img_request);
1868 spin_lock_init(&img_request->completion_lock); 1998 spin_lock_init(&img_request->completion_lock);
1869 img_request->next_completion = 0; 1999 img_request->next_completion = 0;
@@ -1873,9 +2003,6 @@ static struct rbd_img_request *rbd_img_request_create(
1873 INIT_LIST_HEAD(&img_request->obj_requests); 2003 INIT_LIST_HEAD(&img_request->obj_requests);
1874 kref_init(&img_request->kref); 2004 kref_init(&img_request->kref);
1875 2005
1876 rbd_img_request_get(img_request); /* Avoid a warning */
1877 rbd_img_request_put(img_request); /* TEMPORARY */
1878
1879 dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, 2006 dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev,
1880 write_request ? "write" : "read", offset, length, 2007 write_request ? "write" : "read", offset, length,
1881 img_request); 2008 img_request);
@@ -1897,15 +2024,54 @@ static void rbd_img_request_destroy(struct kref *kref)
1897 rbd_img_obj_request_del(img_request, obj_request); 2024 rbd_img_obj_request_del(img_request, obj_request);
1898 rbd_assert(img_request->obj_request_count == 0); 2025 rbd_assert(img_request->obj_request_count == 0);
1899 2026
2027 if (img_request_layered_test(img_request)) {
2028 img_request_layered_clear(img_request);
2029 rbd_dev_parent_put(img_request->rbd_dev);
2030 }
2031
1900 if (img_request_write_test(img_request)) 2032 if (img_request_write_test(img_request))
1901 ceph_put_snap_context(img_request->snapc); 2033 ceph_put_snap_context(img_request->snapc);
1902 2034
1903 if (img_request_child_test(img_request))
1904 rbd_obj_request_put(img_request->obj_request);
1905
1906 kmem_cache_free(rbd_img_request_cache, img_request); 2035 kmem_cache_free(rbd_img_request_cache, img_request);
1907} 2036}
1908 2037
2038static struct rbd_img_request *rbd_parent_request_create(
2039 struct rbd_obj_request *obj_request,
2040 u64 img_offset, u64 length)
2041{
2042 struct rbd_img_request *parent_request;
2043 struct rbd_device *rbd_dev;
2044
2045 rbd_assert(obj_request->img_request);
2046 rbd_dev = obj_request->img_request->rbd_dev;
2047
2048 parent_request = rbd_img_request_create(rbd_dev->parent,
2049 img_offset, length, false);
2050 if (!parent_request)
2051 return NULL;
2052
2053 img_request_child_set(parent_request);
2054 rbd_obj_request_get(obj_request);
2055 parent_request->obj_request = obj_request;
2056
2057 return parent_request;
2058}
2059
2060static void rbd_parent_request_destroy(struct kref *kref)
2061{
2062 struct rbd_img_request *parent_request;
2063 struct rbd_obj_request *orig_request;
2064
2065 parent_request = container_of(kref, struct rbd_img_request, kref);
2066 orig_request = parent_request->obj_request;
2067
2068 parent_request->obj_request = NULL;
2069 rbd_obj_request_put(orig_request);
2070 img_request_child_clear(parent_request);
2071
2072 rbd_img_request_destroy(kref);
2073}
2074
1909static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) 2075static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request)
1910{ 2076{
1911 struct rbd_img_request *img_request; 2077 struct rbd_img_request *img_request;
@@ -2114,7 +2280,7 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request)
2114{ 2280{
2115 struct rbd_img_request *img_request; 2281 struct rbd_img_request *img_request;
2116 struct rbd_device *rbd_dev; 2282 struct rbd_device *rbd_dev;
2117 u64 length; 2283 struct page **pages;
2118 u32 page_count; 2284 u32 page_count;
2119 2285
2120 rbd_assert(obj_request->type == OBJ_REQUEST_BIO); 2286 rbd_assert(obj_request->type == OBJ_REQUEST_BIO);
@@ -2124,12 +2290,14 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request)
2124 2290
2125 rbd_dev = img_request->rbd_dev; 2291 rbd_dev = img_request->rbd_dev;
2126 rbd_assert(rbd_dev); 2292 rbd_assert(rbd_dev);
2127 length = (u64)1 << rbd_dev->header.obj_order;
2128 page_count = (u32)calc_pages_for(0, length);
2129 2293
2130 rbd_assert(obj_request->copyup_pages); 2294 pages = obj_request->copyup_pages;
2131 ceph_release_page_vector(obj_request->copyup_pages, page_count); 2295 rbd_assert(pages != NULL);
2132 obj_request->copyup_pages = NULL; 2296 obj_request->copyup_pages = NULL;
2297 page_count = obj_request->copyup_page_count;
2298 rbd_assert(page_count);
2299 obj_request->copyup_page_count = 0;
2300 ceph_release_page_vector(pages, page_count);
2133 2301
2134 /* 2302 /*
2135 * We want the transfer count to reflect the size of the 2303 * We want the transfer count to reflect the size of the
@@ -2153,9 +2321,11 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
2153 struct ceph_osd_client *osdc; 2321 struct ceph_osd_client *osdc;
2154 struct rbd_device *rbd_dev; 2322 struct rbd_device *rbd_dev;
2155 struct page **pages; 2323 struct page **pages;
2156 int result; 2324 u32 page_count;
2157 u64 obj_size; 2325 int img_result;
2158 u64 xferred; 2326 u64 parent_length;
2327 u64 offset;
2328 u64 length;
2159 2329
2160 rbd_assert(img_request_child_test(img_request)); 2330 rbd_assert(img_request_child_test(img_request));
2161 2331
@@ -2164,46 +2334,74 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
2164 pages = img_request->copyup_pages; 2334 pages = img_request->copyup_pages;
2165 rbd_assert(pages != NULL); 2335 rbd_assert(pages != NULL);
2166 img_request->copyup_pages = NULL; 2336 img_request->copyup_pages = NULL;
2337 page_count = img_request->copyup_page_count;
2338 rbd_assert(page_count);
2339 img_request->copyup_page_count = 0;
2167 2340
2168 orig_request = img_request->obj_request; 2341 orig_request = img_request->obj_request;
2169 rbd_assert(orig_request != NULL); 2342 rbd_assert(orig_request != NULL);
2170 rbd_assert(orig_request->type == OBJ_REQUEST_BIO); 2343 rbd_assert(obj_request_type_valid(orig_request->type));
2171 result = img_request->result; 2344 img_result = img_request->result;
2172 obj_size = img_request->length; 2345 parent_length = img_request->length;
2173 xferred = img_request->xferred; 2346 rbd_assert(parent_length == img_request->xferred);
2347 rbd_img_request_put(img_request);
2174 2348
2175 rbd_dev = img_request->rbd_dev; 2349 rbd_assert(orig_request->img_request);
2350 rbd_dev = orig_request->img_request->rbd_dev;
2176 rbd_assert(rbd_dev); 2351 rbd_assert(rbd_dev);
2177 rbd_assert(obj_size == (u64)1 << rbd_dev->header.obj_order);
2178 2352
2179 rbd_img_request_put(img_request); 2353 /*
2354 * If the overlap has become 0 (most likely because the
2355 * image has been flattened) we need to free the pages
2356 * and re-submit the original write request.
2357 */
2358 if (!rbd_dev->parent_overlap) {
2359 struct ceph_osd_client *osdc;
2180 2360
2181 if (result) 2361 ceph_release_page_vector(pages, page_count);
2182 goto out_err; 2362 osdc = &rbd_dev->rbd_client->client->osdc;
2363 img_result = rbd_obj_request_submit(osdc, orig_request);
2364 if (!img_result)
2365 return;
2366 }
2183 2367
2184 /* Allocate the new copyup osd request for the original request */ 2368 if (img_result)
2369 goto out_err;
2185 2370
2186 result = -ENOMEM; 2371 /*
2187 rbd_assert(!orig_request->osd_req); 2372 * The original osd request is of no use to use any more.
2373 * We need a new one that can hold the two ops in a copyup
2374 * request. Allocate the new copyup osd request for the
2375 * original request, and release the old one.
2376 */
2377 img_result = -ENOMEM;
2188 osd_req = rbd_osd_req_create_copyup(orig_request); 2378 osd_req = rbd_osd_req_create_copyup(orig_request);
2189 if (!osd_req) 2379 if (!osd_req)
2190 goto out_err; 2380 goto out_err;
2381 rbd_osd_req_destroy(orig_request->osd_req);
2191 orig_request->osd_req = osd_req; 2382 orig_request->osd_req = osd_req;
2192 orig_request->copyup_pages = pages; 2383 orig_request->copyup_pages = pages;
2384 orig_request->copyup_page_count = page_count;
2193 2385
2194 /* Initialize the copyup op */ 2386 /* Initialize the copyup op */
2195 2387
2196 osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup"); 2388 osd_req_op_cls_init(osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup");
2197 osd_req_op_cls_request_data_pages(osd_req, 0, pages, obj_size, 0, 2389 osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0,
2198 false, false); 2390 false, false);
2199 2391
2200 /* Then the original write request op */ 2392 /* Then the original write request op */
2201 2393
2394 offset = orig_request->offset;
2395 length = orig_request->length;
2202 osd_req_op_extent_init(osd_req, 1, CEPH_OSD_OP_WRITE, 2396 osd_req_op_extent_init(osd_req, 1, CEPH_OSD_OP_WRITE,
2203 orig_request->offset, 2397 offset, length, 0, 0);
2204 orig_request->length, 0, 0); 2398 if (orig_request->type == OBJ_REQUEST_BIO)
2205 osd_req_op_extent_osd_data_bio(osd_req, 1, orig_request->bio_list, 2399 osd_req_op_extent_osd_data_bio(osd_req, 1,
2206 orig_request->length); 2400 orig_request->bio_list, length);
2401 else
2402 osd_req_op_extent_osd_data_pages(osd_req, 1,
2403 orig_request->pages, length,
2404 offset & ~PAGE_MASK, false, false);
2207 2405
2208 rbd_osd_req_format_write(orig_request); 2406 rbd_osd_req_format_write(orig_request);
2209 2407
@@ -2211,13 +2409,13 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request)
2211 2409
2212 orig_request->callback = rbd_img_obj_copyup_callback; 2410 orig_request->callback = rbd_img_obj_copyup_callback;
2213 osdc = &rbd_dev->rbd_client->client->osdc; 2411 osdc = &rbd_dev->rbd_client->client->osdc;
2214 result = rbd_obj_request_submit(osdc, orig_request); 2412 img_result = rbd_obj_request_submit(osdc, orig_request);
2215 if (!result) 2413 if (!img_result)
2216 return; 2414 return;
2217out_err: 2415out_err:
2218 /* Record the error code and complete the request */ 2416 /* Record the error code and complete the request */
2219 2417
2220 orig_request->result = result; 2418 orig_request->result = img_result;
2221 orig_request->xferred = 0; 2419 orig_request->xferred = 0;
2222 obj_request_done_set(orig_request); 2420 obj_request_done_set(orig_request);
2223 rbd_obj_request_complete(orig_request); 2421 rbd_obj_request_complete(orig_request);
@@ -2249,7 +2447,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
2249 int result; 2447 int result;
2250 2448
2251 rbd_assert(obj_request_img_data_test(obj_request)); 2449 rbd_assert(obj_request_img_data_test(obj_request));
2252 rbd_assert(obj_request->type == OBJ_REQUEST_BIO); 2450 rbd_assert(obj_request_type_valid(obj_request->type));
2253 2451
2254 img_request = obj_request->img_request; 2452 img_request = obj_request->img_request;
2255 rbd_assert(img_request != NULL); 2453 rbd_assert(img_request != NULL);
@@ -2257,15 +2455,6 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
2257 rbd_assert(rbd_dev->parent != NULL); 2455 rbd_assert(rbd_dev->parent != NULL);
2258 2456
2259 /* 2457 /*
2260 * First things first. The original osd request is of no
2261 * use to use any more, we'll need a new one that can hold
2262 * the two ops in a copyup request. We'll get that later,
2263 * but for now we can release the old one.
2264 */
2265 rbd_osd_req_destroy(obj_request->osd_req);
2266 obj_request->osd_req = NULL;
2267
2268 /*
2269 * Determine the byte range covered by the object in the 2458 * Determine the byte range covered by the object in the
2270 * child image to which the original request was to be sent. 2459 * child image to which the original request was to be sent.
2271 */ 2460 */
@@ -2295,18 +2484,16 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
2295 } 2484 }
2296 2485
2297 result = -ENOMEM; 2486 result = -ENOMEM;
2298 parent_request = rbd_img_request_create(rbd_dev->parent, 2487 parent_request = rbd_parent_request_create(obj_request,
2299 img_offset, length, 2488 img_offset, length);
2300 false, true);
2301 if (!parent_request) 2489 if (!parent_request)
2302 goto out_err; 2490 goto out_err;
2303 rbd_obj_request_get(obj_request);
2304 parent_request->obj_request = obj_request;
2305 2491
2306 result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages); 2492 result = rbd_img_request_fill(parent_request, OBJ_REQUEST_PAGES, pages);
2307 if (result) 2493 if (result)
2308 goto out_err; 2494 goto out_err;
2309 parent_request->copyup_pages = pages; 2495 parent_request->copyup_pages = pages;
2496 parent_request->copyup_page_count = page_count;
2310 2497
2311 parent_request->callback = rbd_img_obj_parent_read_full_callback; 2498 parent_request->callback = rbd_img_obj_parent_read_full_callback;
2312 result = rbd_img_request_submit(parent_request); 2499 result = rbd_img_request_submit(parent_request);
@@ -2314,6 +2501,7 @@ static int rbd_img_obj_parent_read_full(struct rbd_obj_request *obj_request)
2314 return 0; 2501 return 0;
2315 2502
2316 parent_request->copyup_pages = NULL; 2503 parent_request->copyup_pages = NULL;
2504 parent_request->copyup_page_count = 0;
2317 parent_request->obj_request = NULL; 2505 parent_request->obj_request = NULL;
2318 rbd_obj_request_put(obj_request); 2506 rbd_obj_request_put(obj_request);
2319out_err: 2507out_err:
@@ -2331,6 +2519,7 @@ out_err:
2331static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request) 2519static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
2332{ 2520{
2333 struct rbd_obj_request *orig_request; 2521 struct rbd_obj_request *orig_request;
2522 struct rbd_device *rbd_dev;
2334 int result; 2523 int result;
2335 2524
2336 rbd_assert(!obj_request_img_data_test(obj_request)); 2525 rbd_assert(!obj_request_img_data_test(obj_request));
@@ -2353,8 +2542,21 @@ static void rbd_img_obj_exists_callback(struct rbd_obj_request *obj_request)
2353 obj_request->xferred, obj_request->length); 2542 obj_request->xferred, obj_request->length);
2354 rbd_obj_request_put(obj_request); 2543 rbd_obj_request_put(obj_request);
2355 2544
2356 rbd_assert(orig_request); 2545 /*
2357 rbd_assert(orig_request->img_request); 2546 * If the overlap has become 0 (most likely because the
2547 * image has been flattened) we need to free the pages
2548 * and re-submit the original write request.
2549 */
2550 rbd_dev = orig_request->img_request->rbd_dev;
2551 if (!rbd_dev->parent_overlap) {
2552 struct ceph_osd_client *osdc;
2553
2554 rbd_obj_request_put(orig_request);
2555 osdc = &rbd_dev->rbd_client->client->osdc;
2556 result = rbd_obj_request_submit(osdc, orig_request);
2557 if (!result)
2558 return;
2559 }
2358 2560
2359 /* 2561 /*
2360 * Our only purpose here is to determine whether the object 2562 * Our only purpose here is to determine whether the object
@@ -2512,14 +2714,36 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
2512 struct rbd_obj_request *obj_request; 2714 struct rbd_obj_request *obj_request;
2513 struct rbd_device *rbd_dev; 2715 struct rbd_device *rbd_dev;
2514 u64 obj_end; 2716 u64 obj_end;
2717 u64 img_xferred;
2718 int img_result;
2515 2719
2516 rbd_assert(img_request_child_test(img_request)); 2720 rbd_assert(img_request_child_test(img_request));
2517 2721
2722 /* First get what we need from the image request and release it */
2723
2518 obj_request = img_request->obj_request; 2724 obj_request = img_request->obj_request;
2725 img_xferred = img_request->xferred;
2726 img_result = img_request->result;
2727 rbd_img_request_put(img_request);
2728
2729 /*
2730 * If the overlap has become 0 (most likely because the
2731 * image has been flattened) we need to re-submit the
2732 * original request.
2733 */
2519 rbd_assert(obj_request); 2734 rbd_assert(obj_request);
2520 rbd_assert(obj_request->img_request); 2735 rbd_assert(obj_request->img_request);
2736 rbd_dev = obj_request->img_request->rbd_dev;
2737 if (!rbd_dev->parent_overlap) {
2738 struct ceph_osd_client *osdc;
2521 2739
2522 obj_request->result = img_request->result; 2740 osdc = &rbd_dev->rbd_client->client->osdc;
2741 img_result = rbd_obj_request_submit(osdc, obj_request);
2742 if (!img_result)
2743 return;
2744 }
2745
2746 obj_request->result = img_result;
2523 if (obj_request->result) 2747 if (obj_request->result)
2524 goto out; 2748 goto out;
2525 2749
@@ -2532,7 +2756,6 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
2532 */ 2756 */
2533 rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length); 2757 rbd_assert(obj_request->img_offset < U64_MAX - obj_request->length);
2534 obj_end = obj_request->img_offset + obj_request->length; 2758 obj_end = obj_request->img_offset + obj_request->length;
2535 rbd_dev = obj_request->img_request->rbd_dev;
2536 if (obj_end > rbd_dev->parent_overlap) { 2759 if (obj_end > rbd_dev->parent_overlap) {
2537 u64 xferred = 0; 2760 u64 xferred = 0;
2538 2761
@@ -2540,43 +2763,39 @@ static void rbd_img_parent_read_callback(struct rbd_img_request *img_request)
2540 xferred = rbd_dev->parent_overlap - 2763 xferred = rbd_dev->parent_overlap -
2541 obj_request->img_offset; 2764 obj_request->img_offset;
2542 2765
2543 obj_request->xferred = min(img_request->xferred, xferred); 2766 obj_request->xferred = min(img_xferred, xferred);
2544 } else { 2767 } else {
2545 obj_request->xferred = img_request->xferred; 2768 obj_request->xferred = img_xferred;
2546 } 2769 }
2547out: 2770out:
2548 rbd_img_request_put(img_request);
2549 rbd_img_obj_request_read_callback(obj_request); 2771 rbd_img_obj_request_read_callback(obj_request);
2550 rbd_obj_request_complete(obj_request); 2772 rbd_obj_request_complete(obj_request);
2551} 2773}
2552 2774
2553static void rbd_img_parent_read(struct rbd_obj_request *obj_request) 2775static void rbd_img_parent_read(struct rbd_obj_request *obj_request)
2554{ 2776{
2555 struct rbd_device *rbd_dev;
2556 struct rbd_img_request *img_request; 2777 struct rbd_img_request *img_request;
2557 int result; 2778 int result;
2558 2779
2559 rbd_assert(obj_request_img_data_test(obj_request)); 2780 rbd_assert(obj_request_img_data_test(obj_request));
2560 rbd_assert(obj_request->img_request != NULL); 2781 rbd_assert(obj_request->img_request != NULL);
2561 rbd_assert(obj_request->result == (s32) -ENOENT); 2782 rbd_assert(obj_request->result == (s32) -ENOENT);
2562 rbd_assert(obj_request->type == OBJ_REQUEST_BIO); 2783 rbd_assert(obj_request_type_valid(obj_request->type));
2563 2784
2564 rbd_dev = obj_request->img_request->rbd_dev;
2565 rbd_assert(rbd_dev->parent != NULL);
2566 /* rbd_read_finish(obj_request, obj_request->length); */ 2785 /* rbd_read_finish(obj_request, obj_request->length); */
2567 img_request = rbd_img_request_create(rbd_dev->parent, 2786 img_request = rbd_parent_request_create(obj_request,
2568 obj_request->img_offset, 2787 obj_request->img_offset,
2569 obj_request->length, 2788 obj_request->length);
2570 false, true);
2571 result = -ENOMEM; 2789 result = -ENOMEM;
2572 if (!img_request) 2790 if (!img_request)
2573 goto out_err; 2791 goto out_err;
2574 2792
2575 rbd_obj_request_get(obj_request); 2793 if (obj_request->type == OBJ_REQUEST_BIO)
2576 img_request->obj_request = obj_request; 2794 result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO,
2577 2795 obj_request->bio_list);
2578 result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, 2796 else
2579 obj_request->bio_list); 2797 result = rbd_img_request_fill(img_request, OBJ_REQUEST_PAGES,
2798 obj_request->pages);
2580 if (result) 2799 if (result)
2581 goto out_err; 2800 goto out_err;
2582 2801
@@ -2626,6 +2845,7 @@ out:
2626static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) 2845static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
2627{ 2846{
2628 struct rbd_device *rbd_dev = (struct rbd_device *)data; 2847 struct rbd_device *rbd_dev = (struct rbd_device *)data;
2848 int ret;
2629 2849
2630 if (!rbd_dev) 2850 if (!rbd_dev)
2631 return; 2851 return;
@@ -2633,7 +2853,9 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
2633 dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, 2853 dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__,
2634 rbd_dev->header_name, (unsigned long long)notify_id, 2854 rbd_dev->header_name, (unsigned long long)notify_id,
2635 (unsigned int)opcode); 2855 (unsigned int)opcode);
2636 (void)rbd_dev_refresh(rbd_dev); 2856 ret = rbd_dev_refresh(rbd_dev);
2857 if (ret)
2858 rbd_warn(rbd_dev, ": header refresh error (%d)\n", ret);
2637 2859
2638 rbd_obj_notify_ack(rbd_dev, notify_id); 2860 rbd_obj_notify_ack(rbd_dev, notify_id);
2639} 2861}
@@ -2642,7 +2864,7 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data)
2642 * Request sync osd watch/unwatch. The value of "start" determines 2864 * Request sync osd watch/unwatch. The value of "start" determines
2643 * whether a watch request is being initiated or torn down. 2865 * whether a watch request is being initiated or torn down.
2644 */ 2866 */
2645static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) 2867static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, bool start)
2646{ 2868{
2647 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; 2869 struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
2648 struct rbd_obj_request *obj_request; 2870 struct rbd_obj_request *obj_request;
@@ -2676,7 +2898,7 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start)
2676 rbd_dev->watch_request->osd_req); 2898 rbd_dev->watch_request->osd_req);
2677 2899
2678 osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH, 2900 osd_req_op_watch_init(obj_request->osd_req, 0, CEPH_OSD_OP_WATCH,
2679 rbd_dev->watch_event->cookie, 0, start); 2901 rbd_dev->watch_event->cookie, 0, start ? 1 : 0);
2680 rbd_osd_req_format_write(obj_request); 2902 rbd_osd_req_format_write(obj_request);
2681 2903
2682 ret = rbd_obj_request_submit(osdc, obj_request); 2904 ret = rbd_obj_request_submit(osdc, obj_request);
@@ -2869,9 +3091,16 @@ static void rbd_request_fn(struct request_queue *q)
2869 goto end_request; /* Shouldn't happen */ 3091 goto end_request; /* Shouldn't happen */
2870 } 3092 }
2871 3093
3094 result = -EIO;
3095 if (offset + length > rbd_dev->mapping.size) {
3096 rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)\n",
3097 offset, length, rbd_dev->mapping.size);
3098 goto end_request;
3099 }
3100
2872 result = -ENOMEM; 3101 result = -ENOMEM;
2873 img_request = rbd_img_request_create(rbd_dev, offset, length, 3102 img_request = rbd_img_request_create(rbd_dev, offset, length,
2874 write_request, false); 3103 write_request);
2875 if (!img_request) 3104 if (!img_request)
2876 goto end_request; 3105 goto end_request;
2877 3106
@@ -3022,17 +3251,11 @@ out:
3022} 3251}
3023 3252
3024/* 3253/*
3025 * Read the complete header for the given rbd device. 3254 * Read the complete header for the given rbd device. On successful
3026 * 3255 * return, the rbd_dev->header field will contain up-to-date
3027 * Returns a pointer to a dynamically-allocated buffer containing 3256 * information about the image.
3028 * the complete and validated header. Caller can pass the address
3029 * of a variable that will be filled in with the version of the
3030 * header object at the time it was read.
3031 *
3032 * Returns a pointer-coded errno if a failure occurs.
3033 */ 3257 */
3034static struct rbd_image_header_ondisk * 3258static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
3035rbd_dev_v1_header_read(struct rbd_device *rbd_dev)
3036{ 3259{
3037 struct rbd_image_header_ondisk *ondisk = NULL; 3260 struct rbd_image_header_ondisk *ondisk = NULL;
3038 u32 snap_count = 0; 3261 u32 snap_count = 0;
@@ -3057,22 +3280,22 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev)
3057 size += names_size; 3280 size += names_size;
3058 ondisk = kmalloc(size, GFP_KERNEL); 3281 ondisk = kmalloc(size, GFP_KERNEL);
3059 if (!ondisk) 3282 if (!ondisk)
3060 return ERR_PTR(-ENOMEM); 3283 return -ENOMEM;
3061 3284
3062 ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, 3285 ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name,
3063 0, size, ondisk); 3286 0, size, ondisk);
3064 if (ret < 0) 3287 if (ret < 0)
3065 goto out_err; 3288 goto out;
3066 if ((size_t)ret < size) { 3289 if ((size_t)ret < size) {
3067 ret = -ENXIO; 3290 ret = -ENXIO;
3068 rbd_warn(rbd_dev, "short header read (want %zd got %d)", 3291 rbd_warn(rbd_dev, "short header read (want %zd got %d)",
3069 size, ret); 3292 size, ret);
3070 goto out_err; 3293 goto out;
3071 } 3294 }
3072 if (!rbd_dev_ondisk_valid(ondisk)) { 3295 if (!rbd_dev_ondisk_valid(ondisk)) {
3073 ret = -ENXIO; 3296 ret = -ENXIO;
3074 rbd_warn(rbd_dev, "invalid header"); 3297 rbd_warn(rbd_dev, "invalid header");
3075 goto out_err; 3298 goto out;
3076 } 3299 }
3077 3300
3078 names_size = le64_to_cpu(ondisk->snap_names_len); 3301 names_size = le64_to_cpu(ondisk->snap_names_len);
@@ -3080,85 +3303,13 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev)
3080 snap_count = le32_to_cpu(ondisk->snap_count); 3303 snap_count = le32_to_cpu(ondisk->snap_count);
3081 } while (snap_count != want_count); 3304 } while (snap_count != want_count);
3082 3305
3083 return ondisk; 3306 ret = rbd_header_from_disk(rbd_dev, ondisk);
3084 3307out:
3085out_err:
3086 kfree(ondisk);
3087
3088 return ERR_PTR(ret);
3089}
3090
3091/*
3092 * reload the ondisk the header
3093 */
3094static int rbd_read_header(struct rbd_device *rbd_dev,
3095 struct rbd_image_header *header)
3096{
3097 struct rbd_image_header_ondisk *ondisk;
3098 int ret;
3099
3100 ondisk = rbd_dev_v1_header_read(rbd_dev);
3101 if (IS_ERR(ondisk))
3102 return PTR_ERR(ondisk);
3103 ret = rbd_header_from_disk(header, ondisk);
3104 kfree(ondisk); 3308 kfree(ondisk);
3105 3309
3106 return ret; 3310 return ret;
3107} 3311}
3108 3312
3109static void rbd_update_mapping_size(struct rbd_device *rbd_dev)
3110{
3111 if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
3112 return;
3113
3114 if (rbd_dev->mapping.size != rbd_dev->header.image_size) {
3115 sector_t size;
3116
3117 rbd_dev->mapping.size = rbd_dev->header.image_size;
3118 size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
3119 dout("setting size to %llu sectors", (unsigned long long)size);
3120 set_capacity(rbd_dev->disk, size);
3121 }
3122}
3123
3124/*
3125 * only read the first part of the ondisk header, without the snaps info
3126 */
3127static int rbd_dev_v1_refresh(struct rbd_device *rbd_dev)
3128{
3129 int ret;
3130 struct rbd_image_header h;
3131
3132 ret = rbd_read_header(rbd_dev, &h);
3133 if (ret < 0)
3134 return ret;
3135
3136 down_write(&rbd_dev->header_rwsem);
3137
3138 /* Update image size, and check for resize of mapped image */
3139 rbd_dev->header.image_size = h.image_size;
3140 rbd_update_mapping_size(rbd_dev);
3141
3142 /* rbd_dev->header.object_prefix shouldn't change */
3143 kfree(rbd_dev->header.snap_sizes);
3144 kfree(rbd_dev->header.snap_names);
3145 /* osd requests may still refer to snapc */
3146 ceph_put_snap_context(rbd_dev->header.snapc);
3147
3148 rbd_dev->header.image_size = h.image_size;
3149 rbd_dev->header.snapc = h.snapc;
3150 rbd_dev->header.snap_names = h.snap_names;
3151 rbd_dev->header.snap_sizes = h.snap_sizes;
3152 /* Free the extra copy of the object prefix */
3153 if (strcmp(rbd_dev->header.object_prefix, h.object_prefix))
3154 rbd_warn(rbd_dev, "object prefix changed (ignoring)");
3155 kfree(h.object_prefix);
3156
3157 up_write(&rbd_dev->header_rwsem);
3158
3159 return ret;
3160}
3161
3162/* 3313/*
3163 * Clear the rbd device's EXISTS flag if the snapshot it's mapped to 3314 * Clear the rbd device's EXISTS flag if the snapshot it's mapped to
3164 * has disappeared from the (just updated) snapshot context. 3315 * has disappeared from the (just updated) snapshot context.
@@ -3180,26 +3331,29 @@ static void rbd_exists_validate(struct rbd_device *rbd_dev)
3180 3331
3181static int rbd_dev_refresh(struct rbd_device *rbd_dev) 3332static int rbd_dev_refresh(struct rbd_device *rbd_dev)
3182{ 3333{
3183 u64 image_size; 3334 u64 mapping_size;
3184 int ret; 3335 int ret;
3185 3336
3186 rbd_assert(rbd_image_format_valid(rbd_dev->image_format)); 3337 rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
3187 image_size = rbd_dev->header.image_size; 3338 mapping_size = rbd_dev->mapping.size;
3188 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); 3339 mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
3189 if (rbd_dev->image_format == 1) 3340 if (rbd_dev->image_format == 1)
3190 ret = rbd_dev_v1_refresh(rbd_dev); 3341 ret = rbd_dev_v1_header_info(rbd_dev);
3191 else 3342 else
3192 ret = rbd_dev_v2_refresh(rbd_dev); 3343 ret = rbd_dev_v2_header_info(rbd_dev);
3193 3344
3194 /* If it's a mapped snapshot, validate its EXISTS flag */ 3345 /* If it's a mapped snapshot, validate its EXISTS flag */
3195 3346
3196 rbd_exists_validate(rbd_dev); 3347 rbd_exists_validate(rbd_dev);
3197 mutex_unlock(&ctl_mutex); 3348 mutex_unlock(&ctl_mutex);
3198 if (ret) 3349 if (mapping_size != rbd_dev->mapping.size) {
3199 rbd_warn(rbd_dev, "got notification but failed to " 3350 sector_t size;
3200 " update snaps: %d\n", ret); 3351
3201 if (image_size != rbd_dev->header.image_size) 3352 size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE;
3353 dout("setting size to %llu sectors", (unsigned long long)size);
3354 set_capacity(rbd_dev->disk, size);
3202 revalidate_disk(rbd_dev->disk); 3355 revalidate_disk(rbd_dev->disk);
3356 }
3203 3357
3204 return ret; 3358 return ret;
3205} 3359}
@@ -3403,6 +3557,8 @@ static ssize_t rbd_image_refresh(struct device *dev,
3403 int ret; 3557 int ret;
3404 3558
3405 ret = rbd_dev_refresh(rbd_dev); 3559 ret = rbd_dev_refresh(rbd_dev);
3560 if (ret)
3561 rbd_warn(rbd_dev, ": manual header refresh error (%d)\n", ret);
3406 3562
3407 return ret < 0 ? ret : size; 3563 return ret < 0 ? ret : size;
3408} 3564}
@@ -3501,6 +3657,7 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
3501 3657
3502 spin_lock_init(&rbd_dev->lock); 3658 spin_lock_init(&rbd_dev->lock);
3503 rbd_dev->flags = 0; 3659 rbd_dev->flags = 0;
3660 atomic_set(&rbd_dev->parent_ref, 0);
3504 INIT_LIST_HEAD(&rbd_dev->node); 3661 INIT_LIST_HEAD(&rbd_dev->node);
3505 init_rwsem(&rbd_dev->header_rwsem); 3662 init_rwsem(&rbd_dev->header_rwsem);
3506 3663
@@ -3650,6 +3807,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
3650 __le64 snapid; 3807 __le64 snapid;
3651 void *p; 3808 void *p;
3652 void *end; 3809 void *end;
3810 u64 pool_id;
3653 char *image_id; 3811 char *image_id;
3654 u64 overlap; 3812 u64 overlap;
3655 int ret; 3813 int ret;
@@ -3680,18 +3838,37 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
3680 p = reply_buf; 3838 p = reply_buf;
3681 end = reply_buf + ret; 3839 end = reply_buf + ret;
3682 ret = -ERANGE; 3840 ret = -ERANGE;
3683 ceph_decode_64_safe(&p, end, parent_spec->pool_id, out_err); 3841 ceph_decode_64_safe(&p, end, pool_id, out_err);
3684 if (parent_spec->pool_id == CEPH_NOPOOL) 3842 if (pool_id == CEPH_NOPOOL) {
3843 /*
3844 * Either the parent never existed, or we have
3845 * record of it but the image got flattened so it no
3846 * longer has a parent. When the parent of a
3847 * layered image disappears we immediately set the
3848 * overlap to 0. The effect of this is that all new
3849 * requests will be treated as if the image had no
3850 * parent.
3851 */
3852 if (rbd_dev->parent_overlap) {
3853 rbd_dev->parent_overlap = 0;
3854 smp_mb();
3855 rbd_dev_parent_put(rbd_dev);
3856 pr_info("%s: clone image has been flattened\n",
3857 rbd_dev->disk->disk_name);
3858 }
3859
3685 goto out; /* No parent? No problem. */ 3860 goto out; /* No parent? No problem. */
3861 }
3686 3862
3687 /* The ceph file layout needs to fit pool id in 32 bits */ 3863 /* The ceph file layout needs to fit pool id in 32 bits */
3688 3864
3689 ret = -EIO; 3865 ret = -EIO;
3690 if (parent_spec->pool_id > (u64)U32_MAX) { 3866 if (pool_id > (u64)U32_MAX) {
3691 rbd_warn(NULL, "parent pool id too large (%llu > %u)\n", 3867 rbd_warn(NULL, "parent pool id too large (%llu > %u)\n",
3692 (unsigned long long)parent_spec->pool_id, U32_MAX); 3868 (unsigned long long)pool_id, U32_MAX);
3693 goto out_err; 3869 goto out_err;
3694 } 3870 }
3871 parent_spec->pool_id = pool_id;
3695 3872
3696 image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); 3873 image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
3697 if (IS_ERR(image_id)) { 3874 if (IS_ERR(image_id)) {
@@ -3702,9 +3879,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
3702 ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); 3879 ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err);
3703 ceph_decode_64_safe(&p, end, overlap, out_err); 3880 ceph_decode_64_safe(&p, end, overlap, out_err);
3704 3881
3705 rbd_dev->parent_overlap = overlap; 3882 if (overlap) {
3706 rbd_dev->parent_spec = parent_spec; 3883 rbd_spec_put(rbd_dev->parent_spec);
3707 parent_spec = NULL; /* rbd_dev now owns this */ 3884 rbd_dev->parent_spec = parent_spec;
3885 parent_spec = NULL; /* rbd_dev now owns this */
3886 rbd_dev->parent_overlap = overlap;
3887 } else {
3888 rbd_warn(rbd_dev, "ignoring parent of clone with overlap 0\n");
3889 }
3708out: 3890out:
3709 ret = 0; 3891 ret = 0;
3710out_err: 3892out_err:
@@ -4002,6 +4184,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
4002 for (i = 0; i < snap_count; i++) 4184 for (i = 0; i < snap_count; i++)
4003 snapc->snaps[i] = ceph_decode_64(&p); 4185 snapc->snaps[i] = ceph_decode_64(&p);
4004 4186
4187 ceph_put_snap_context(rbd_dev->header.snapc);
4005 rbd_dev->header.snapc = snapc; 4188 rbd_dev->header.snapc = snapc;
4006 4189
4007 dout(" snap context seq = %llu, snap_count = %u\n", 4190 dout(" snap context seq = %llu, snap_count = %u\n",
@@ -4053,21 +4236,56 @@ out:
4053 return snap_name; 4236 return snap_name;
4054} 4237}
4055 4238
4056static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev) 4239static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
4057{ 4240{
4241 bool first_time = rbd_dev->header.object_prefix == NULL;
4058 int ret; 4242 int ret;
4059 4243
4060 down_write(&rbd_dev->header_rwsem); 4244 down_write(&rbd_dev->header_rwsem);
4061 4245
4246 if (first_time) {
4247 ret = rbd_dev_v2_header_onetime(rbd_dev);
4248 if (ret)
4249 goto out;
4250 }
4251
4252 /*
4253 * If the image supports layering, get the parent info. We
4254 * need to probe the first time regardless. Thereafter we
4255 * only need to if there's a parent, to see if it has
4256 * disappeared due to the mapped image getting flattened.
4257 */
4258 if (rbd_dev->header.features & RBD_FEATURE_LAYERING &&
4259 (first_time || rbd_dev->parent_spec)) {
4260 bool warn;
4261
4262 ret = rbd_dev_v2_parent_info(rbd_dev);
4263 if (ret)
4264 goto out;
4265
4266 /*
4267 * Print a warning if this is the initial probe and
4268 * the image has a parent. Don't print it if the
4269 * image now being probed is itself a parent. We
4270 * can tell at this point because we won't know its
4271 * pool name yet (just its pool id).
4272 */
4273 warn = rbd_dev->parent_spec && rbd_dev->spec->pool_name;
4274 if (first_time && warn)
4275 rbd_warn(rbd_dev, "WARNING: kernel layering "
4276 "is EXPERIMENTAL!");
4277 }
4278
4062 ret = rbd_dev_v2_image_size(rbd_dev); 4279 ret = rbd_dev_v2_image_size(rbd_dev);
4063 if (ret) 4280 if (ret)
4064 goto out; 4281 goto out;
4065 rbd_update_mapping_size(rbd_dev); 4282
4283 if (rbd_dev->spec->snap_id == CEPH_NOSNAP)
4284 if (rbd_dev->mapping.size != rbd_dev->header.image_size)
4285 rbd_dev->mapping.size = rbd_dev->header.image_size;
4066 4286
4067 ret = rbd_dev_v2_snap_context(rbd_dev); 4287 ret = rbd_dev_v2_snap_context(rbd_dev);
4068 dout("rbd_dev_v2_snap_context returned %d\n", ret); 4288 dout("rbd_dev_v2_snap_context returned %d\n", ret);
4069 if (ret)
4070 goto out;
4071out: 4289out:
4072 up_write(&rbd_dev->header_rwsem); 4290 up_write(&rbd_dev->header_rwsem);
4073 4291
@@ -4484,16 +4702,18 @@ out:
4484 return ret; 4702 return ret;
4485} 4703}
4486 4704
4487/* Undo whatever state changes are made by v1 or v2 image probe */ 4705/*
4488 4706 * Undo whatever state changes are made by v1 or v2 header info
4707 * call.
4708 */
4489static void rbd_dev_unprobe(struct rbd_device *rbd_dev) 4709static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
4490{ 4710{
4491 struct rbd_image_header *header; 4711 struct rbd_image_header *header;
4492 4712
4493 rbd_dev_remove_parent(rbd_dev); 4713 /* Drop parent reference unless it's already been done (or none) */
4494 rbd_spec_put(rbd_dev->parent_spec); 4714
4495 rbd_dev->parent_spec = NULL; 4715 if (rbd_dev->parent_overlap)
4496 rbd_dev->parent_overlap = 0; 4716 rbd_dev_parent_put(rbd_dev);
4497 4717
4498 /* Free dynamic fields from the header, then zero it out */ 4718 /* Free dynamic fields from the header, then zero it out */
4499 4719
@@ -4505,72 +4725,22 @@ static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
4505 memset(header, 0, sizeof (*header)); 4725 memset(header, 0, sizeof (*header));
4506} 4726}
4507 4727
4508static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) 4728static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
4509{ 4729{
4510 int ret; 4730 int ret;
4511 4731
4512 /* Populate rbd image metadata */
4513
4514 ret = rbd_read_header(rbd_dev, &rbd_dev->header);
4515 if (ret < 0)
4516 goto out_err;
4517
4518 /* Version 1 images have no parent (no layering) */
4519
4520 rbd_dev->parent_spec = NULL;
4521 rbd_dev->parent_overlap = 0;
4522
4523 dout("discovered version 1 image, header name is %s\n",
4524 rbd_dev->header_name);
4525
4526 return 0;
4527
4528out_err:
4529 kfree(rbd_dev->header_name);
4530 rbd_dev->header_name = NULL;
4531 kfree(rbd_dev->spec->image_id);
4532 rbd_dev->spec->image_id = NULL;
4533
4534 return ret;
4535}
4536
4537static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
4538{
4539 int ret;
4540
4541 ret = rbd_dev_v2_image_size(rbd_dev);
4542 if (ret)
4543 goto out_err;
4544
4545 /* Get the object prefix (a.k.a. block_name) for the image */
4546
4547 ret = rbd_dev_v2_object_prefix(rbd_dev); 4732 ret = rbd_dev_v2_object_prefix(rbd_dev);
4548 if (ret) 4733 if (ret)
4549 goto out_err; 4734 goto out_err;
4550 4735
4551 /* Get the and check features for the image */ 4736 /*
4552 4737 * Get the and check features for the image. Currently the
4738 * features are assumed to never change.
4739 */
4553 ret = rbd_dev_v2_features(rbd_dev); 4740 ret = rbd_dev_v2_features(rbd_dev);
4554 if (ret) 4741 if (ret)
4555 goto out_err; 4742 goto out_err;
4556 4743
4557 /* If the image supports layering, get the parent info */
4558
4559 if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
4560 ret = rbd_dev_v2_parent_info(rbd_dev);
4561 if (ret)
4562 goto out_err;
4563
4564 /*
4565 * Don't print a warning for parent images. We can
4566 * tell this point because we won't know its pool
4567 * name yet (just its pool id).
4568 */
4569 if (rbd_dev->spec->pool_name)
4570 rbd_warn(rbd_dev, "WARNING: kernel layering "
4571 "is EXPERIMENTAL!");
4572 }
4573
4574 /* If the image supports fancy striping, get its parameters */ 4744 /* If the image supports fancy striping, get its parameters */
4575 4745
4576 if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) { 4746 if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
@@ -4578,28 +4748,11 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev)
4578 if (ret < 0) 4748 if (ret < 0)
4579 goto out_err; 4749 goto out_err;
4580 } 4750 }
4581 4751 /* No support for crypto and compression type format 2 images */
4582 /* crypto and compression type aren't (yet) supported for v2 images */
4583
4584 rbd_dev->header.crypt_type = 0;
4585 rbd_dev->header.comp_type = 0;
4586
4587 /* Get the snapshot context, plus the header version */
4588
4589 ret = rbd_dev_v2_snap_context(rbd_dev);
4590 if (ret)
4591 goto out_err;
4592
4593 dout("discovered version 2 image, header name is %s\n",
4594 rbd_dev->header_name);
4595 4752
4596 return 0; 4753 return 0;
4597out_err: 4754out_err:
4598 rbd_dev->parent_overlap = 0; 4755 rbd_dev->header.features = 0;
4599 rbd_spec_put(rbd_dev->parent_spec);
4600 rbd_dev->parent_spec = NULL;
4601 kfree(rbd_dev->header_name);
4602 rbd_dev->header_name = NULL;
4603 kfree(rbd_dev->header.object_prefix); 4756 kfree(rbd_dev->header.object_prefix);
4604 rbd_dev->header.object_prefix = NULL; 4757 rbd_dev->header.object_prefix = NULL;
4605 4758
@@ -4628,15 +4781,16 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
4628 if (!parent) 4781 if (!parent)
4629 goto out_err; 4782 goto out_err;
4630 4783
4631 ret = rbd_dev_image_probe(parent); 4784 ret = rbd_dev_image_probe(parent, false);
4632 if (ret < 0) 4785 if (ret < 0)
4633 goto out_err; 4786 goto out_err;
4634 rbd_dev->parent = parent; 4787 rbd_dev->parent = parent;
4788 atomic_set(&rbd_dev->parent_ref, 1);
4635 4789
4636 return 0; 4790 return 0;
4637out_err: 4791out_err:
4638 if (parent) { 4792 if (parent) {
4639 rbd_spec_put(rbd_dev->parent_spec); 4793 rbd_dev_unparent(rbd_dev);
4640 kfree(rbd_dev->header_name); 4794 kfree(rbd_dev->header_name);
4641 rbd_dev_destroy(parent); 4795 rbd_dev_destroy(parent);
4642 } else { 4796 } else {
@@ -4651,10 +4805,6 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
4651{ 4805{
4652 int ret; 4806 int ret;
4653 4807
4654 ret = rbd_dev_mapping_set(rbd_dev);
4655 if (ret)
4656 return ret;
4657
4658 /* generate unique id: find highest unique id, add one */ 4808 /* generate unique id: find highest unique id, add one */
4659 rbd_dev_id_get(rbd_dev); 4809 rbd_dev_id_get(rbd_dev);
4660 4810
@@ -4676,13 +4826,17 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
4676 if (ret) 4826 if (ret)
4677 goto err_out_blkdev; 4827 goto err_out_blkdev;
4678 4828
4679 ret = rbd_bus_add_dev(rbd_dev); 4829 ret = rbd_dev_mapping_set(rbd_dev);
4680 if (ret) 4830 if (ret)
4681 goto err_out_disk; 4831 goto err_out_disk;
4832 set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
4833
4834 ret = rbd_bus_add_dev(rbd_dev);
4835 if (ret)
4836 goto err_out_mapping;
4682 4837
4683 /* Everything's ready. Announce the disk to the world. */ 4838 /* Everything's ready. Announce the disk to the world. */
4684 4839
4685 set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE);
4686 set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 4840 set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
4687 add_disk(rbd_dev->disk); 4841 add_disk(rbd_dev->disk);
4688 4842
@@ -4691,6 +4845,8 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev)
4691 4845
4692 return ret; 4846 return ret;
4693 4847
4848err_out_mapping:
4849 rbd_dev_mapping_clear(rbd_dev);
4694err_out_disk: 4850err_out_disk:
4695 rbd_free_disk(rbd_dev); 4851 rbd_free_disk(rbd_dev);
4696err_out_blkdev: 4852err_out_blkdev:
@@ -4731,12 +4887,7 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev)
4731 4887
4732static void rbd_dev_image_release(struct rbd_device *rbd_dev) 4888static void rbd_dev_image_release(struct rbd_device *rbd_dev)
4733{ 4889{
4734 int ret;
4735
4736 rbd_dev_unprobe(rbd_dev); 4890 rbd_dev_unprobe(rbd_dev);
4737 ret = rbd_dev_header_watch_sync(rbd_dev, 0);
4738 if (ret)
4739 rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
4740 kfree(rbd_dev->header_name); 4891 kfree(rbd_dev->header_name);
4741 rbd_dev->header_name = NULL; 4892 rbd_dev->header_name = NULL;
4742 rbd_dev->image_format = 0; 4893 rbd_dev->image_format = 0;
@@ -4748,18 +4899,20 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
4748 4899
4749/* 4900/*
4750 * Probe for the existence of the header object for the given rbd 4901 * Probe for the existence of the header object for the given rbd
4751 * device. For format 2 images this includes determining the image 4902 * device. If this image is the one being mapped (i.e., not a
4752 * id. 4903 * parent), initiate a watch on its header object before using that
4904 * object to get detailed information about the rbd image.
4753 */ 4905 */
4754static int rbd_dev_image_probe(struct rbd_device *rbd_dev) 4906static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
4755{ 4907{
4756 int ret; 4908 int ret;
4757 int tmp; 4909 int tmp;
4758 4910
4759 /* 4911 /*
4760 * Get the id from the image id object. If it's not a 4912 * Get the id from the image id object. Unless there's an
4761 * format 2 image, we'll get ENOENT back, and we'll assume 4913 * error, rbd_dev->spec->image_id will be filled in with
4762 * it's a format 1 image. 4914 * a dynamically-allocated string, and rbd_dev->image_format
4915 * will be set to either 1 or 2.
4763 */ 4916 */
4764 ret = rbd_dev_image_id(rbd_dev); 4917 ret = rbd_dev_image_id(rbd_dev);
4765 if (ret) 4918 if (ret)
@@ -4771,14 +4924,16 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev)
4771 if (ret) 4924 if (ret)
4772 goto err_out_format; 4925 goto err_out_format;
4773 4926
4774 ret = rbd_dev_header_watch_sync(rbd_dev, 1); 4927 if (mapping) {
4775 if (ret) 4928 ret = rbd_dev_header_watch_sync(rbd_dev, true);
4776 goto out_header_name; 4929 if (ret)
4930 goto out_header_name;
4931 }
4777 4932
4778 if (rbd_dev->image_format == 1) 4933 if (rbd_dev->image_format == 1)
4779 ret = rbd_dev_v1_probe(rbd_dev); 4934 ret = rbd_dev_v1_header_info(rbd_dev);
4780 else 4935 else
4781 ret = rbd_dev_v2_probe(rbd_dev); 4936 ret = rbd_dev_v2_header_info(rbd_dev);
4782 if (ret) 4937 if (ret)
4783 goto err_out_watch; 4938 goto err_out_watch;
4784 4939
@@ -4787,15 +4942,22 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev)
4787 goto err_out_probe; 4942 goto err_out_probe;
4788 4943
4789 ret = rbd_dev_probe_parent(rbd_dev); 4944 ret = rbd_dev_probe_parent(rbd_dev);
4790 if (!ret) 4945 if (ret)
4791 return 0; 4946 goto err_out_probe;
4947
4948 dout("discovered format %u image, header name is %s\n",
4949 rbd_dev->image_format, rbd_dev->header_name);
4792 4950
4951 return 0;
4793err_out_probe: 4952err_out_probe:
4794 rbd_dev_unprobe(rbd_dev); 4953 rbd_dev_unprobe(rbd_dev);
4795err_out_watch: 4954err_out_watch:
4796 tmp = rbd_dev_header_watch_sync(rbd_dev, 0); 4955 if (mapping) {
4797 if (tmp) 4956 tmp = rbd_dev_header_watch_sync(rbd_dev, false);
4798 rbd_warn(rbd_dev, "unable to tear down watch request\n"); 4957 if (tmp)
4958 rbd_warn(rbd_dev, "unable to tear down "
4959 "watch request (%d)\n", tmp);
4960 }
4799out_header_name: 4961out_header_name:
4800 kfree(rbd_dev->header_name); 4962 kfree(rbd_dev->header_name);
4801 rbd_dev->header_name = NULL; 4963 rbd_dev->header_name = NULL;
@@ -4819,6 +4981,7 @@ static ssize_t rbd_add(struct bus_type *bus,
4819 struct rbd_spec *spec = NULL; 4981 struct rbd_spec *spec = NULL;
4820 struct rbd_client *rbdc; 4982 struct rbd_client *rbdc;
4821 struct ceph_osd_client *osdc; 4983 struct ceph_osd_client *osdc;
4984 bool read_only;
4822 int rc = -ENOMEM; 4985 int rc = -ENOMEM;
4823 4986
4824 if (!try_module_get(THIS_MODULE)) 4987 if (!try_module_get(THIS_MODULE))
@@ -4828,13 +4991,15 @@ static ssize_t rbd_add(struct bus_type *bus,
4828 rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec); 4991 rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
4829 if (rc < 0) 4992 if (rc < 0)
4830 goto err_out_module; 4993 goto err_out_module;
4994 read_only = rbd_opts->read_only;
4995 kfree(rbd_opts);
4996 rbd_opts = NULL; /* done with this */
4831 4997
4832 rbdc = rbd_get_client(ceph_opts); 4998 rbdc = rbd_get_client(ceph_opts);
4833 if (IS_ERR(rbdc)) { 4999 if (IS_ERR(rbdc)) {
4834 rc = PTR_ERR(rbdc); 5000 rc = PTR_ERR(rbdc);
4835 goto err_out_args; 5001 goto err_out_args;
4836 } 5002 }
4837 ceph_opts = NULL; /* rbd_dev client now owns this */
4838 5003
4839 /* pick the pool */ 5004 /* pick the pool */
4840 osdc = &rbdc->client->osdc; 5005 osdc = &rbdc->client->osdc;
@@ -4858,27 +5023,29 @@ static ssize_t rbd_add(struct bus_type *bus,
4858 rbdc = NULL; /* rbd_dev now owns this */ 5023 rbdc = NULL; /* rbd_dev now owns this */
4859 spec = NULL; /* rbd_dev now owns this */ 5024 spec = NULL; /* rbd_dev now owns this */
4860 5025
4861 rbd_dev->mapping.read_only = rbd_opts->read_only; 5026 rc = rbd_dev_image_probe(rbd_dev, true);
4862 kfree(rbd_opts);
4863 rbd_opts = NULL; /* done with this */
4864
4865 rc = rbd_dev_image_probe(rbd_dev);
4866 if (rc < 0) 5027 if (rc < 0)
4867 goto err_out_rbd_dev; 5028 goto err_out_rbd_dev;
4868 5029
5030 /* If we are mapping a snapshot it must be marked read-only */
5031
5032 if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
5033 read_only = true;
5034 rbd_dev->mapping.read_only = read_only;
5035
4869 rc = rbd_dev_device_setup(rbd_dev); 5036 rc = rbd_dev_device_setup(rbd_dev);
4870 if (!rc) 5037 if (rc) {
4871 return count; 5038 rbd_dev_image_release(rbd_dev);
5039 goto err_out_module;
5040 }
5041
5042 return count;
4872 5043
4873 rbd_dev_image_release(rbd_dev);
4874err_out_rbd_dev: 5044err_out_rbd_dev:
4875 rbd_dev_destroy(rbd_dev); 5045 rbd_dev_destroy(rbd_dev);
4876err_out_client: 5046err_out_client:
4877 rbd_put_client(rbdc); 5047 rbd_put_client(rbdc);
4878err_out_args: 5048err_out_args:
4879 if (ceph_opts)
4880 ceph_destroy_options(ceph_opts);
4881 kfree(rbd_opts);
4882 rbd_spec_put(spec); 5049 rbd_spec_put(spec);
4883err_out_module: 5050err_out_module:
4884 module_put(THIS_MODULE); 5051 module_put(THIS_MODULE);
@@ -4911,7 +5078,7 @@ static void rbd_dev_device_release(struct device *dev)
4911 5078
4912 rbd_free_disk(rbd_dev); 5079 rbd_free_disk(rbd_dev);
4913 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); 5080 clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags);
4914 rbd_dev_clear_mapping(rbd_dev); 5081 rbd_dev_mapping_clear(rbd_dev);
4915 unregister_blkdev(rbd_dev->major, rbd_dev->name); 5082 unregister_blkdev(rbd_dev->major, rbd_dev->name);
4916 rbd_dev->major = 0; 5083 rbd_dev->major = 0;
4917 rbd_dev_id_put(rbd_dev); 5084 rbd_dev_id_put(rbd_dev);
@@ -4978,10 +5145,13 @@ static ssize_t rbd_remove(struct bus_type *bus,
4978 spin_unlock_irq(&rbd_dev->lock); 5145 spin_unlock_irq(&rbd_dev->lock);
4979 if (ret < 0) 5146 if (ret < 0)
4980 goto done; 5147 goto done;
4981 ret = count;
4982 rbd_bus_del_dev(rbd_dev); 5148 rbd_bus_del_dev(rbd_dev);
5149 ret = rbd_dev_header_watch_sync(rbd_dev, false);
5150 if (ret)
5151 rbd_warn(rbd_dev, "failed to cancel watch event (%d)\n", ret);
4983 rbd_dev_image_release(rbd_dev); 5152 rbd_dev_image_release(rbd_dev);
4984 module_put(THIS_MODULE); 5153 module_put(THIS_MODULE);
5154 ret = count;
4985done: 5155done:
4986 mutex_unlock(&ctl_mutex); 5156 mutex_unlock(&ctl_mutex);
4987 5157
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index f8ef15f37c5e..3fd130fdfbc1 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -1160,8 +1160,7 @@ static int ace_probe(struct platform_device *dev)
1160 dev_dbg(&dev->dev, "ace_probe(%p)\n", dev); 1160 dev_dbg(&dev->dev, "ace_probe(%p)\n", dev);
1161 1161
1162 /* device id and bus width */ 1162 /* device id and bus width */
1163 of_property_read_u32(dev->dev.of_node, "port-number", &id); 1163 if (of_property_read_u32(dev->dev.of_node, "port-number", &id))
1164 if (id < 0)
1165 id = 0; 1164 id = 0;
1166 if (of_find_property(dev->dev.of_node, "8-bit", NULL)) 1165 if (of_find_property(dev->dev.of_node, "8-bit", NULL))
1167 bus_width = ACE_BUS_WIDTH_8; 1166 bus_width = ACE_BUS_WIDTH_8;