diff options
author | Dan Williams <dan.j.williams@intel.com> | 2016-01-10 10:53:55 -0500 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2016-01-10 10:53:55 -0500 |
commit | 8b63b6bfc1a551acf154061699028c7032d7890c (patch) | |
tree | 16882e9bc9e35eacb870a6d8a71617e579c4ffdc /fs/block_dev.c | |
parent | e07ecd76d4db7bda1e9495395b2110a3fe28845a (diff) | |
parent | 55f5560d8c18fe33fc169f8d244a9247dcac7612 (diff) |
Merge branch 'for-4.5/block-dax' into for-4.5/libnvdimm
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r-- | fs/block_dev.c | 131 |
1 files changed, 113 insertions, 18 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index c25639e907bd..5c0b2cba870e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -156,11 +156,16 @@ blkdev_get_block(struct inode *inode, sector_t iblock, | |||
156 | return 0; | 156 | return 0; |
157 | } | 157 | } |
158 | 158 | ||
159 | static struct inode *bdev_file_inode(struct file *file) | ||
160 | { | ||
161 | return file->f_mapping->host; | ||
162 | } | ||
163 | |||
159 | static ssize_t | 164 | static ssize_t |
160 | blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) | 165 | blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) |
161 | { | 166 | { |
162 | struct file *file = iocb->ki_filp; | 167 | struct file *file = iocb->ki_filp; |
163 | struct inode *inode = file->f_mapping->host; | 168 | struct inode *inode = bdev_file_inode(file); |
164 | 169 | ||
165 | if (IS_DAX(inode)) | 170 | if (IS_DAX(inode)) |
166 | return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, | 171 | return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, |
@@ -338,7 +343,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping, | |||
338 | */ | 343 | */ |
339 | static loff_t block_llseek(struct file *file, loff_t offset, int whence) | 344 | static loff_t block_llseek(struct file *file, loff_t offset, int whence) |
340 | { | 345 | { |
341 | struct inode *bd_inode = file->f_mapping->host; | 346 | struct inode *bd_inode = bdev_file_inode(file); |
342 | loff_t retval; | 347 | loff_t retval; |
343 | 348 | ||
344 | mutex_lock(&bd_inode->i_mutex); | 349 | mutex_lock(&bd_inode->i_mutex); |
@@ -349,7 +354,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence) | |||
349 | 354 | ||
350 | int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) | 355 | int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) |
351 | { | 356 | { |
352 | struct inode *bd_inode = filp->f_mapping->host; | 357 | struct inode *bd_inode = bdev_file_inode(filp); |
353 | struct block_device *bdev = I_BDEV(bd_inode); | 358 | struct block_device *bdev = I_BDEV(bd_inode); |
354 | int error; | 359 | int error; |
355 | 360 | ||
@@ -1230,8 +1235,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1230 | } | 1235 | } |
1231 | } | 1236 | } |
1232 | 1237 | ||
1233 | if (!ret) | 1238 | if (!ret) { |
1234 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1239 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
1240 | if (!blkdev_dax_capable(bdev)) | ||
1241 | bdev->bd_inode->i_flags &= ~S_DAX; | ||
1242 | } | ||
1235 | 1243 | ||
1236 | /* | 1244 | /* |
1237 | * If the device is invalidated, rescan partition | 1245 | * If the device is invalidated, rescan partition |
@@ -1245,6 +1253,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1245 | else if (ret == -ENOMEDIUM) | 1253 | else if (ret == -ENOMEDIUM) |
1246 | invalidate_partitions(disk, bdev); | 1254 | invalidate_partitions(disk, bdev); |
1247 | } | 1255 | } |
1256 | |||
1248 | if (ret) | 1257 | if (ret) |
1249 | goto out_clear; | 1258 | goto out_clear; |
1250 | } else { | 1259 | } else { |
@@ -1265,12 +1274,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1265 | goto out_clear; | 1274 | goto out_clear; |
1266 | } | 1275 | } |
1267 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); | 1276 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); |
1268 | /* | 1277 | if (!blkdev_dax_capable(bdev)) |
1269 | * If the partition is not aligned on a page | ||
1270 | * boundary, we can't do dax I/O to it. | ||
1271 | */ | ||
1272 | if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || | ||
1273 | (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) | ||
1274 | bdev->bd_inode->i_flags &= ~S_DAX; | 1278 | bdev->bd_inode->i_flags &= ~S_DAX; |
1275 | } | 1279 | } |
1276 | } else { | 1280 | } else { |
@@ -1523,11 +1527,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1523 | WARN_ON_ONCE(bdev->bd_holders); | 1527 | WARN_ON_ONCE(bdev->bd_holders); |
1524 | sync_blockdev(bdev); | 1528 | sync_blockdev(bdev); |
1525 | kill_bdev(bdev); | 1529 | kill_bdev(bdev); |
1530 | |||
1531 | bdev_write_inode(bdev); | ||
1526 | /* | 1532 | /* |
1527 | * ->release can cause the queue to disappear, so flush all | 1533 | * Detaching bdev inode from its wb in __destroy_inode() |
1528 | * dirty data before. | 1534 | * is too late: the queue which embeds its bdi (along with |
1535 | * root wb) can be gone as soon as we put_disk() below. | ||
1529 | */ | 1536 | */ |
1530 | bdev_write_inode(bdev); | 1537 | inode_detach_wb(bdev->bd_inode); |
1531 | } | 1538 | } |
1532 | if (bdev->bd_contains == bdev) { | 1539 | if (bdev->bd_contains == bdev) { |
1533 | if (disk->fops->release) | 1540 | if (disk->fops->release) |
@@ -1602,14 +1609,14 @@ EXPORT_SYMBOL(blkdev_put); | |||
1602 | 1609 | ||
1603 | static int blkdev_close(struct inode * inode, struct file * filp) | 1610 | static int blkdev_close(struct inode * inode, struct file * filp) |
1604 | { | 1611 | { |
1605 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); | 1612 | struct block_device *bdev = I_BDEV(bdev_file_inode(filp)); |
1606 | blkdev_put(bdev, filp->f_mode); | 1613 | blkdev_put(bdev, filp->f_mode); |
1607 | return 0; | 1614 | return 0; |
1608 | } | 1615 | } |
1609 | 1616 | ||
1610 | static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 1617 | static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
1611 | { | 1618 | { |
1612 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | 1619 | struct block_device *bdev = I_BDEV(bdev_file_inode(file)); |
1613 | fmode_t mode = file->f_mode; | 1620 | fmode_t mode = file->f_mode; |
1614 | 1621 | ||
1615 | /* | 1622 | /* |
@@ -1634,7 +1641,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
1634 | ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) | 1641 | ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) |
1635 | { | 1642 | { |
1636 | struct file *file = iocb->ki_filp; | 1643 | struct file *file = iocb->ki_filp; |
1637 | struct inode *bd_inode = file->f_mapping->host; | 1644 | struct inode *bd_inode = bdev_file_inode(file); |
1638 | loff_t size = i_size_read(bd_inode); | 1645 | loff_t size = i_size_read(bd_inode); |
1639 | struct blk_plug plug; | 1646 | struct blk_plug plug; |
1640 | ssize_t ret; | 1647 | ssize_t ret; |
@@ -1666,7 +1673,7 @@ EXPORT_SYMBOL_GPL(blkdev_write_iter); | |||
1666 | ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) | 1673 | ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) |
1667 | { | 1674 | { |
1668 | struct file *file = iocb->ki_filp; | 1675 | struct file *file = iocb->ki_filp; |
1669 | struct inode *bd_inode = file->f_mapping->host; | 1676 | struct inode *bd_inode = bdev_file_inode(file); |
1670 | loff_t size = i_size_read(bd_inode); | 1677 | loff_t size = i_size_read(bd_inode); |
1671 | loff_t pos = iocb->ki_pos; | 1678 | loff_t pos = iocb->ki_pos; |
1672 | 1679 | ||
@@ -1705,13 +1712,101 @@ static const struct address_space_operations def_blk_aops = { | |||
1705 | .is_dirty_writeback = buffer_check_dirty_writeback, | 1712 | .is_dirty_writeback = buffer_check_dirty_writeback, |
1706 | }; | 1713 | }; |
1707 | 1714 | ||
1715 | #ifdef CONFIG_FS_DAX | ||
1716 | /* | ||
1717 | * In the raw block case we do not need to contend with truncation nor | ||
1718 | * unwritten file extents. Without those concerns there is no need for | ||
1719 | * additional locking beyond the mmap_sem context that these routines | ||
1720 | * are already executing under. | ||
1721 | * | ||
1722 | * Note, there is no protection if the block device is dynamically | ||
1723 | * resized (partition grow/shrink) during a fault. A stable block device | ||
1724 | * size is already not enforced in the blkdev_direct_IO path. | ||
1725 | * | ||
1726 | * For DAX, it is the responsibility of the block device driver to | ||
1727 | * ensure the whole-disk device size is stable while requests are in | ||
1728 | * flight. | ||
1729 | * | ||
1730 | * Finally, unlike the filemap_page_mkwrite() case there is no | ||
1731 | * filesystem superblock to sync against freezing. We still include a | ||
1732 | * pfn_mkwrite callback for dax drivers to receive write fault | ||
1733 | * notifications. | ||
1734 | */ | ||
1735 | static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
1736 | { | ||
1737 | return __dax_fault(vma, vmf, blkdev_get_block, NULL); | ||
1738 | } | ||
1739 | |||
1740 | static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | ||
1741 | pmd_t *pmd, unsigned int flags) | ||
1742 | { | ||
1743 | return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); | ||
1744 | } | ||
1745 | |||
1746 | static void blkdev_vm_open(struct vm_area_struct *vma) | ||
1747 | { | ||
1748 | struct inode *bd_inode = bdev_file_inode(vma->vm_file); | ||
1749 | struct block_device *bdev = I_BDEV(bd_inode); | ||
1750 | |||
1751 | mutex_lock(&bd_inode->i_mutex); | ||
1752 | bdev->bd_map_count++; | ||
1753 | mutex_unlock(&bd_inode->i_mutex); | ||
1754 | } | ||
1755 | |||
1756 | static void blkdev_vm_close(struct vm_area_struct *vma) | ||
1757 | { | ||
1758 | struct inode *bd_inode = bdev_file_inode(vma->vm_file); | ||
1759 | struct block_device *bdev = I_BDEV(bd_inode); | ||
1760 | |||
1761 | mutex_lock(&bd_inode->i_mutex); | ||
1762 | bdev->bd_map_count--; | ||
1763 | mutex_unlock(&bd_inode->i_mutex); | ||
1764 | } | ||
1765 | |||
1766 | static const struct vm_operations_struct blkdev_dax_vm_ops = { | ||
1767 | .open = blkdev_vm_open, | ||
1768 | .close = blkdev_vm_close, | ||
1769 | .fault = blkdev_dax_fault, | ||
1770 | .pmd_fault = blkdev_dax_pmd_fault, | ||
1771 | .pfn_mkwrite = blkdev_dax_fault, | ||
1772 | }; | ||
1773 | |||
1774 | static const struct vm_operations_struct blkdev_default_vm_ops = { | ||
1775 | .open = blkdev_vm_open, | ||
1776 | .close = blkdev_vm_close, | ||
1777 | .fault = filemap_fault, | ||
1778 | .map_pages = filemap_map_pages, | ||
1779 | }; | ||
1780 | |||
1781 | static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) | ||
1782 | { | ||
1783 | struct inode *bd_inode = bdev_file_inode(file); | ||
1784 | struct block_device *bdev = I_BDEV(bd_inode); | ||
1785 | |||
1786 | file_accessed(file); | ||
1787 | mutex_lock(&bd_inode->i_mutex); | ||
1788 | bdev->bd_map_count++; | ||
1789 | if (IS_DAX(bd_inode)) { | ||
1790 | vma->vm_ops = &blkdev_dax_vm_ops; | ||
1791 | vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; | ||
1792 | } else { | ||
1793 | vma->vm_ops = &blkdev_default_vm_ops; | ||
1794 | } | ||
1795 | mutex_unlock(&bd_inode->i_mutex); | ||
1796 | |||
1797 | return 0; | ||
1798 | } | ||
1799 | #else | ||
1800 | #define blkdev_mmap generic_file_mmap | ||
1801 | #endif | ||
1802 | |||
1708 | const struct file_operations def_blk_fops = { | 1803 | const struct file_operations def_blk_fops = { |
1709 | .open = blkdev_open, | 1804 | .open = blkdev_open, |
1710 | .release = blkdev_close, | 1805 | .release = blkdev_close, |
1711 | .llseek = block_llseek, | 1806 | .llseek = block_llseek, |
1712 | .read_iter = blkdev_read_iter, | 1807 | .read_iter = blkdev_read_iter, |
1713 | .write_iter = blkdev_write_iter, | 1808 | .write_iter = blkdev_write_iter, |
1714 | .mmap = generic_file_mmap, | 1809 | .mmap = blkdev_mmap, |
1715 | .fsync = blkdev_fsync, | 1810 | .fsync = blkdev_fsync, |
1716 | .unlocked_ioctl = block_ioctl, | 1811 | .unlocked_ioctl = block_ioctl, |
1717 | #ifdef CONFIG_COMPAT | 1812 | #ifdef CONFIG_COMPAT |