aboutsummaryrefslogtreecommitdiffstats
path: root/fs/block_dev.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-13 22:15:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-13 22:15:14 -0500
commitd080827f850ba4df5b955d5ca8c8c0fc92fe18c0 (patch)
tree37262315200bbbe50bdd64ce3011951a92855159 /fs/block_dev.c
parentcbd88cd4c07f9361914ab7fd7e21c9227986fe68 (diff)
parent8b63b6bfc1a551acf154061699028c7032d7890c (diff)
Merge tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams: "The bulk of this has appeared in -next and independently received a build success notification from the kbuild robot. The 'for-4.5/block- dax' topic branch was rebased over the weekend to drop the "block device end-of-life" rework that Al would like to see re-implemented with a notifier, and to address bug reports against the badblocks integration. There is pending feedback against "libnvdimm: Add a poison list and export badblocks" received last week. Linda identified some localized fixups that we will handle incrementally. Summary: - Media error handling: The 'badblocks' implementation that originated in md-raid is up-levelled to a generic capability of a block device. This initial implementation is limited to being consulted in the pmem block-i/o path. Later, 'badblocks' will be consulted when creating dax mappings. - Raw block device dax: For virtualization and other cases that want large contiguous mappings of persistent memory, add the capability to dax-mmap a block device directly. - Increased /dev/mem restrictions: Add an option to treat all io-memory as IORESOURCE_EXCLUSIVE, i.e. disable /dev/mem access while a driver is actively using an address range. This behavior is controlled via the new CONFIG_IO_STRICT_DEVMEM option and can be overridden by the existing "iomem=relaxed" kernel command line option. - Miscellaneous fixes include a 'pfn'-device huge page alignment fix, block device shutdown crash fix, and other small libnvdimm fixes" * tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (32 commits) block: kill disk_{check|set|clear|alloc}_badblocks libnvdimm, pmem: nvdimm_read_bytes() badblocks support pmem, dax: disable dax in the presence of bad blocks pmem: fail io-requests to known bad blocks libnvdimm: convert to statically allocated badblocks libnvdimm: don't fail init for full badblocks list block, badblocks: introduce devm_init_badblocks block: clarify badblocks lifetime badblocks: rename badblocks_free to badblocks_exit libnvdimm, pmem: move definition of nvdimm_namespace_add_poison to nd.h libnvdimm: Add a poison list and export badblocks nfit_test: Enable DSMs for all test NFITs md: convert to use the generic badblocks code block: Add badblock management for gendisks badblocks: Add core badblock management code block: fix del_gendisk() vs blkdev_ioctl crash block: enable dax for raw block devices block: introduce bdev_file_inode() restrict /dev/mem to idle io memory ranges arch: consolidate CONFIG_STRICT_DEVM in lib/Kconfig.debug ...
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r--fs/block_dev.c122
1 files changed, 107 insertions, 15 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 01b8e0d4b4ff..d878e4860fb7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -156,11 +156,16 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
156 return 0; 156 return 0;
157} 157}
158 158
159static struct inode *bdev_file_inode(struct file *file)
160{
161 return file->f_mapping->host;
162}
163
159static ssize_t 164static ssize_t
160blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) 165blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
161{ 166{
162 struct file *file = iocb->ki_filp; 167 struct file *file = iocb->ki_filp;
163 struct inode *inode = file->f_mapping->host; 168 struct inode *inode = bdev_file_inode(file);
164 169
165 if (IS_DAX(inode)) 170 if (IS_DAX(inode))
166 return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, 171 return dax_do_io(iocb, inode, iter, offset, blkdev_get_block,
@@ -338,7 +343,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
338 */ 343 */
339static loff_t block_llseek(struct file *file, loff_t offset, int whence) 344static loff_t block_llseek(struct file *file, loff_t offset, int whence)
340{ 345{
341 struct inode *bd_inode = file->f_mapping->host; 346 struct inode *bd_inode = bdev_file_inode(file);
342 loff_t retval; 347 loff_t retval;
343 348
344 mutex_lock(&bd_inode->i_mutex); 349 mutex_lock(&bd_inode->i_mutex);
@@ -349,7 +354,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
349 354
350int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) 355int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
351{ 356{
352 struct inode *bd_inode = filp->f_mapping->host; 357 struct inode *bd_inode = bdev_file_inode(filp);
353 struct block_device *bdev = I_BDEV(bd_inode); 358 struct block_device *bdev = I_BDEV(bd_inode);
354 int error; 359 int error;
355 360
@@ -1224,8 +1229,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1224 } 1229 }
1225 } 1230 }
1226 1231
1227 if (!ret) 1232 if (!ret) {
1228 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1233 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
1234 if (!blkdev_dax_capable(bdev))
1235 bdev->bd_inode->i_flags &= ~S_DAX;
1236 }
1229 1237
1230 /* 1238 /*
1231 * If the device is invalidated, rescan partition 1239 * If the device is invalidated, rescan partition
@@ -1239,6 +1247,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1239 else if (ret == -ENOMEDIUM) 1247 else if (ret == -ENOMEDIUM)
1240 invalidate_partitions(disk, bdev); 1248 invalidate_partitions(disk, bdev);
1241 } 1249 }
1250
1242 if (ret) 1251 if (ret)
1243 goto out_clear; 1252 goto out_clear;
1244 } else { 1253 } else {
@@ -1259,12 +1268,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
1259 goto out_clear; 1268 goto out_clear;
1260 } 1269 }
1261 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); 1270 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
1262 /* 1271 if (!blkdev_dax_capable(bdev))
1263 * If the partition is not aligned on a page
1264 * boundary, we can't do dax I/O to it.
1265 */
1266 if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
1267 (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
1268 bdev->bd_inode->i_flags &= ~S_DAX; 1272 bdev->bd_inode->i_flags &= ~S_DAX;
1269 } 1273 }
1270 } else { 1274 } else {
@@ -1599,14 +1603,14 @@ EXPORT_SYMBOL(blkdev_put);
1599 1603
1600static int blkdev_close(struct inode * inode, struct file * filp) 1604static int blkdev_close(struct inode * inode, struct file * filp)
1601{ 1605{
1602 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1606 struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
1603 blkdev_put(bdev, filp->f_mode); 1607 blkdev_put(bdev, filp->f_mode);
1604 return 0; 1608 return 0;
1605} 1609}
1606 1610
1607static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1611static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1608{ 1612{
1609 struct block_device *bdev = I_BDEV(file->f_mapping->host); 1613 struct block_device *bdev = I_BDEV(bdev_file_inode(file));
1610 fmode_t mode = file->f_mode; 1614 fmode_t mode = file->f_mode;
1611 1615
1612 /* 1616 /*
@@ -1631,7 +1635,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1631ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) 1635ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
1632{ 1636{
1633 struct file *file = iocb->ki_filp; 1637 struct file *file = iocb->ki_filp;
1634 struct inode *bd_inode = file->f_mapping->host; 1638 struct inode *bd_inode = bdev_file_inode(file);
1635 loff_t size = i_size_read(bd_inode); 1639 loff_t size = i_size_read(bd_inode);
1636 struct blk_plug plug; 1640 struct blk_plug plug;
1637 ssize_t ret; 1641 ssize_t ret;
@@ -1663,7 +1667,7 @@ EXPORT_SYMBOL_GPL(blkdev_write_iter);
1663ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) 1667ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
1664{ 1668{
1665 struct file *file = iocb->ki_filp; 1669 struct file *file = iocb->ki_filp;
1666 struct inode *bd_inode = file->f_mapping->host; 1670 struct inode *bd_inode = bdev_file_inode(file);
1667 loff_t size = i_size_read(bd_inode); 1671 loff_t size = i_size_read(bd_inode);
1668 loff_t pos = iocb->ki_pos; 1672 loff_t pos = iocb->ki_pos;
1669 1673
@@ -1702,13 +1706,101 @@ static const struct address_space_operations def_blk_aops = {
1702 .is_dirty_writeback = buffer_check_dirty_writeback, 1706 .is_dirty_writeback = buffer_check_dirty_writeback,
1703}; 1707};
1704 1708
1709#ifdef CONFIG_FS_DAX
1710/*
1711 * In the raw block case we do not need to contend with truncation nor
1712 * unwritten file extents. Without those concerns there is no need for
1713 * additional locking beyond the mmap_sem context that these routines
1714 * are already executing under.
1715 *
1716 * Note, there is no protection if the block device is dynamically
1717 * resized (partition grow/shrink) during a fault. A stable block device
1718 * size is already not enforced in the blkdev_direct_IO path.
1719 *
1720 * For DAX, it is the responsibility of the block device driver to
1721 * ensure the whole-disk device size is stable while requests are in
1722 * flight.
1723 *
1724 * Finally, unlike the filemap_page_mkwrite() case there is no
1725 * filesystem superblock to sync against freezing. We still include a
1726 * pfn_mkwrite callback for dax drivers to receive write fault
1727 * notifications.
1728 */
1729static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1730{
1731 return __dax_fault(vma, vmf, blkdev_get_block, NULL);
1732}
1733
1734static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
1735 pmd_t *pmd, unsigned int flags)
1736{
1737 return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
1738}
1739
1740static void blkdev_vm_open(struct vm_area_struct *vma)
1741{
1742 struct inode *bd_inode = bdev_file_inode(vma->vm_file);
1743 struct block_device *bdev = I_BDEV(bd_inode);
1744
1745 mutex_lock(&bd_inode->i_mutex);
1746 bdev->bd_map_count++;
1747 mutex_unlock(&bd_inode->i_mutex);
1748}
1749
1750static void blkdev_vm_close(struct vm_area_struct *vma)
1751{
1752 struct inode *bd_inode = bdev_file_inode(vma->vm_file);
1753 struct block_device *bdev = I_BDEV(bd_inode);
1754
1755 mutex_lock(&bd_inode->i_mutex);
1756 bdev->bd_map_count--;
1757 mutex_unlock(&bd_inode->i_mutex);
1758}
1759
1760static const struct vm_operations_struct blkdev_dax_vm_ops = {
1761 .open = blkdev_vm_open,
1762 .close = blkdev_vm_close,
1763 .fault = blkdev_dax_fault,
1764 .pmd_fault = blkdev_dax_pmd_fault,
1765 .pfn_mkwrite = blkdev_dax_fault,
1766};
1767
1768static const struct vm_operations_struct blkdev_default_vm_ops = {
1769 .open = blkdev_vm_open,
1770 .close = blkdev_vm_close,
1771 .fault = filemap_fault,
1772 .map_pages = filemap_map_pages,
1773};
1774
1775static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
1776{
1777 struct inode *bd_inode = bdev_file_inode(file);
1778 struct block_device *bdev = I_BDEV(bd_inode);
1779
1780 file_accessed(file);
1781 mutex_lock(&bd_inode->i_mutex);
1782 bdev->bd_map_count++;
1783 if (IS_DAX(bd_inode)) {
1784 vma->vm_ops = &blkdev_dax_vm_ops;
1785 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
1786 } else {
1787 vma->vm_ops = &blkdev_default_vm_ops;
1788 }
1789 mutex_unlock(&bd_inode->i_mutex);
1790
1791 return 0;
1792}
1793#else
1794#define blkdev_mmap generic_file_mmap
1795#endif
1796
1705const struct file_operations def_blk_fops = { 1797const struct file_operations def_blk_fops = {
1706 .open = blkdev_open, 1798 .open = blkdev_open,
1707 .release = blkdev_close, 1799 .release = blkdev_close,
1708 .llseek = block_llseek, 1800 .llseek = block_llseek,
1709 .read_iter = blkdev_read_iter, 1801 .read_iter = blkdev_read_iter,
1710 .write_iter = blkdev_write_iter, 1802 .write_iter = blkdev_write_iter,
1711 .mmap = generic_file_mmap, 1803 .mmap = blkdev_mmap,
1712 .fsync = blkdev_fsync, 1804 .fsync = blkdev_fsync,
1713 .unlocked_ioctl = block_ioctl, 1805 .unlocked_ioctl = block_ioctl,
1714#ifdef CONFIG_COMPAT 1806#ifdef CONFIG_COMPAT