aboutsummaryrefslogtreecommitdiffstats
path: root/fs/block_dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r--fs/block_dev.c131
1 files changed, 113 insertions, 18 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c25639e907bd..5c0b2cba870e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -156,11 +156,16 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
156 return 0; 156 return 0;
157} 157}
158 158
159static struct inode *bdev_file_inode(struct file *file)
160{
161 return file->f_mapping->host;
162}
163
159static ssize_t 164static ssize_t
160blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) 165blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
161{ 166{
162 struct file *file = iocb->ki_filp; 167 struct file *file = iocb->ki_filp;
163 struct inode *inode = file->f_mapping->host; 168 struct inode *inode = bdev_file_inode(file);
164 169
165 if (IS_DAX(inode)) 170 if (IS_DAX(inode))
166 return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, 171 return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
@@ -338,7 +343,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
338 */ 343 */
339static loff_t block_llseek(struct file *file, loff_t offset, int whence) 344static loff_t block_llseek(struct file *file, loff_t offset, int whence)
340{ 345{
341 struct inode *bd_inode = file->f_mapping->host; 346 struct inode *bd_inode = bdev_file_inode(file);
342 loff_t retval; 347 loff_t retval;
343 348
344 mutex_lock(&bd_inode->i_mutex); 349 mutex_lock(&bd_inode->i_mutex);
@@ -349,7 +354,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
349 354
350int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) 355int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
351{ 356{
352 struct inode *bd_inode = filp->f_mapping->host; 357 struct inode *bd_inode = bdev_file_inode(filp);
353 struct block_device *bdev = I_BDEV(bd_inode); 358 struct block_device *bdev = I_BDEV(bd_inode);
354 int error; 359 int error;
355 360
@@ -1230,8 +1235,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1230 } 1235 }
1231 } 1236 }
1232 1237
1233 if (!ret) 1238 if (!ret) {
1234 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1239 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1240 if (!blkdev_dax_capable(bdev))
1241 bdev->bd_inode->i_flags &= ~S_DAX;
1242 }
1235 1243
1236 /* 1244 /*
1237 * If the device is invalidated, rescan partition 1245 * If the device is invalidated, rescan partition
@@ -1245,6 +1253,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1245 else if (ret == -ENOMEDIUM) 1253 else if (ret == -ENOMEDIUM)
1246 invalidate_partitions(disk, bdev); 1254 invalidate_partitions(disk, bdev);
1247 } 1255 }
1256
1248 if (ret) 1257 if (ret)
1249 goto out_clear; 1258 goto out_clear;
1250 } else { 1259 } else {
@@ -1265,12 +1274,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1265 goto out_clear; 1274 goto out_clear;
1266 } 1275 }
1267 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); 1276 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1268 /* 1277 if (!blkdev_dax_capable(bdev))
1269 * If the partition is not aligned on a page
1270 * boundary, we can't do dax I/O to it.
1271 */
1272 if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
1273 (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
1274 bdev->bd_inode->i_flags &= ~S_DAX; 1278 bdev->bd_inode->i_flags &= ~S_DAX;
1275 } 1279 }
1276 } else { 1280 } else {
@@ -1523,11 +1527,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1523 WARN_ON_ONCE(bdev->bd_holders); 1527 WARN_ON_ONCE(bdev->bd_holders);
1524 sync_blockdev(bdev); 1528 sync_blockdev(bdev);
1525 kill_bdev(bdev); 1529 kill_bdev(bdev);
1530
1531 bdev_write_inode(bdev);
1526 /* 1532 /*
1527 * ->release can cause the queue to disappear, so flush all 1533 * Detaching bdev inode from its wb in __destroy_inode()
1528 * dirty data before. 1534 * is too late: the queue which embeds its bdi (along with
1535 * root wb) can be gone as soon as we put_disk() below.
1529 */ 1536 */
1530 bdev_write_inode(bdev); 1537 inode_detach_wb(bdev->bd_inode);
1531 } 1538 }
1532 if (bdev->bd_contains == bdev) { 1539 if (bdev->bd_contains == bdev) {
1533 if (disk->fops->release) 1540 if (disk->fops->release)
@@ -1602,14 +1609,14 @@ EXPORT_SYMBOL(blkdev_put);
1602 1609
1603static int blkdev_close(struct inode * inode, struct file * filp) 1610static int blkdev_close(struct inode * inode, struct file * filp)
1604{ 1611{
1605 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1612 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1606 blkdev_put(bdev, filp->f_mode); 1613 blkdev_put(bdev, filp->f_mode);
1607 return 0; 1614 return 0;
1608} 1615}
1609 1616
1610static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1617static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1611{ 1618{
1612 struct block_device *bdev = I_BDEV(file->f_mapping->host); 1619 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1613 fmode_t mode = file->f_mode; 1620 fmode_t mode = file->f_mode;
1614 1621
1615 /* 1622 /*
@@ -1634,7 +1641,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1634ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) 1641ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1635{ 1642{
1636 struct file *file = iocb->ki_filp; 1643 struct file *file = iocb->ki_filp;
1637 struct inode *bd_inode = file->f_mapping->host; 1644 struct inode *bd_inode = bdev_file_inode(file);
1638 loff_t size = i_size_read(bd_inode); 1645 loff_t size = i_size_read(bd_inode);
1639 struct blk_plug plug; 1646 struct blk_plug plug;
1640 ssize_t ret; 1647 ssize_t ret;
@@ -1666,7 +1673,7 @@ EXPORT_SYMBOL_GPL(blkdev_write_iter);
1666ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) 1673ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1667{ 1674{
1668 struct file *file = iocb->ki_filp; 1675 struct file *file = iocb->ki_filp;
1669 struct inode *bd_inode = file->f_mapping->host; 1676 struct inode *bd_inode = bdev_file_inode(file);
1670 loff_t size = i_size_read(bd_inode); 1677 loff_t size = i_size_read(bd_inode);
1671 loff_t pos = iocb->ki_pos; 1678 loff_t pos = iocb->ki_pos;
1672 1679
@@ -1705,13 +1712,101 @@ static const struct address_space_operations def_blk_aops = {
1705 .is_dirty_writeback = buffer_check_dirty_writeback, 1712 .is_dirty_writeback = buffer_check_dirty_writeback,
1706}; 1713};
1707 1714
1715#ifdef CONFIG_FS_DAX
1716/*
1717 * In the raw block case we do not need to contend with truncation nor
1718 * unwritten file extents. Without those concerns there is no need for
1719 * additional locking beyond the mmap_sem context that these routines
1720 * are already executing under.
1721 *
1722 * Note, there is no protection if the block device is dynamically
1723 * resized (partition grow/shrink) during a fault. A stable block device
1724 * size is already not enforced in the blkdev_direct_IO path.
1725 *
1726 * For DAX, it is the responsibility of the block device driver to
1727 * ensure the whole-disk device size is stable while requests are in
1728 * flight.
1729 *
1730 * Finally, unlike the filemap_page_mkwrite() case there is no
1731 * filesystem superblock to sync against freezing. We still include a
1732 * pfn_mkwrite callback for dax drivers to receive write fault
1733 * notifications.
1734 */
1735static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1736{
1737 return __dax_fault(vma, vmf, blkdev_get_block, NULL);
1738}
1739
1740static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
1741 pmd_t *pmd, unsigned int flags)
1742{
1743 return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
1744}
1745
1746static void blkdev_vm_open(struct vm_area_struct *vma)
1747{
1748 struct inode *bd_inode = bdev_file_inode(vma->vm_file);
1749 struct block_device *bdev = I_BDEV(bd_inode);
1750
1751 mutex_lock(&bd_inode->i_mutex);
1752 bdev->bd_map_count++;
1753 mutex_unlock(&bd_inode->i_mutex);
1754}
1755
1756static void blkdev_vm_close(struct vm_area_struct *vma)
1757{
1758 struct inode *bd_inode = bdev_file_inode(vma->vm_file);
1759 struct block_device *bdev = I_BDEV(bd_inode);
1760
1761 mutex_lock(&bd_inode->i_mutex);
1762 bdev->bd_map_count--;
1763 mutex_unlock(&bd_inode->i_mutex);
1764}
1765
1766static const struct vm_operations_struct blkdev_dax_vm_ops = {
1767 .open = blkdev_vm_open,
1768 .close = blkdev_vm_close,
1769 .fault = blkdev_dax_fault,
1770 .pmd_fault = blkdev_dax_pmd_fault,
1771 .pfn_mkwrite = blkdev_dax_fault,
1772};
1773
1774static const struct vm_operations_struct blkdev_default_vm_ops = {
1775 .open = blkdev_vm_open,
1776 .close = blkdev_vm_close,
1777 .fault = filemap_fault,
1778 .map_pages = filemap_map_pages,
1779};
1780
1781static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
1782{
1783 struct inode *bd_inode = bdev_file_inode(file);
1784 struct block_device *bdev = I_BDEV(bd_inode);
1785
1786 file_accessed(file);
1787 mutex_lock(&bd_inode->i_mutex);
1788 bdev->bd_map_count++;
1789 if (IS_DAX(bd_inode)) {
1790 vma->vm_ops = &blkdev_dax_vm_ops;
1791 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
1792 } else {
1793 vma->vm_ops = &blkdev_default_vm_ops;
1794 }
1795 mutex_unlock(&bd_inode->i_mutex);
1796
1797 return 0;
1798}
1799#else
1800#define blkdev_mmap generic_file_mmap
1801#endif
1802
1708const struct file_operations def_blk_fops = { 1803const struct file_operations def_blk_fops = {
1709 .open = blkdev_open, 1804 .open = blkdev_open,
1710 .release = blkdev_close, 1805 .release = blkdev_close,
1711 .llseek = block_llseek, 1806 .llseek = block_llseek,
1712 .read_iter = blkdev_read_iter, 1807 .read_iter = blkdev_read_iter,
1713 .write_iter = blkdev_write_iter, 1808 .write_iter = blkdev_write_iter,
1714 .mmap = generic_file_mmap, 1809 .mmap = blkdev_mmap,
1715 .fsync = blkdev_fsync, 1810 .fsync = blkdev_fsync,
1716 .unlocked_ioctl = block_ioctl, 1811 .unlocked_ioctl = block_ioctl,
1717#ifdef CONFIG_COMPAT 1812#ifdef CONFIG_COMPAT