diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-13 22:15:14 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-13 22:15:14 -0500 |
| commit | d080827f850ba4df5b955d5ca8c8c0fc92fe18c0 (patch) | |
| tree | 37262315200bbbe50bdd64ce3011951a92855159 /fs/block_dev.c | |
| parent | cbd88cd4c07f9361914ab7fd7e21c9227986fe68 (diff) | |
| parent | 8b63b6bfc1a551acf154061699028c7032d7890c (diff) | |
Merge tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"The bulk of this has appeared in -next and independently received a
build success notification from the kbuild robot. The 'for-4.5/block-
dax' topic branch was rebased over the weekend to drop the "block
device end-of-life" rework that Al would like to see re-implemented
with a notifier, and to address bug reports against the badblocks
integration.
There is pending feedback against "libnvdimm: Add a poison list and
export badblocks" received last week. Linda identified some localized
fixups that we will handle incrementally.
Summary:
- Media error handling: The 'badblocks' implementation that
originated in md-raid is up-levelled to a generic capability of a
block device. This initial implementation is limited to being
consulted in the pmem block-i/o path. Later, 'badblocks' will be
consulted when creating dax mappings.
- Raw block device dax: For virtualization and other cases that want
large contiguous mappings of persistent memory, add the capability
to dax-mmap a block device directly.
- Increased /dev/mem restrictions: Add an option to treat all
io-memory as IORESOURCE_EXCLUSIVE, i.e. disable /dev/mem access
while a driver is actively using an address range. This behavior
is controlled via the new CONFIG_IO_STRICT_DEVMEM option and can be
overridden by the existing "iomem=relaxed" kernel command line
option.
- Miscellaneous fixes include a 'pfn'-device huge page alignment fix,
block device shutdown crash fix, and other small libnvdimm fixes"
* tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (32 commits)
block: kill disk_{check|set|clear|alloc}_badblocks
libnvdimm, pmem: nvdimm_read_bytes() badblocks support
pmem, dax: disable dax in the presence of bad blocks
pmem: fail io-requests to known bad blocks
libnvdimm: convert to statically allocated badblocks
libnvdimm: don't fail init for full badblocks list
block, badblocks: introduce devm_init_badblocks
block: clarify badblocks lifetime
badblocks: rename badblocks_free to badblocks_exit
libnvdimm, pmem: move definition of nvdimm_namespace_add_poison to nd.h
libnvdimm: Add a poison list and export badblocks
nfit_test: Enable DSMs for all test NFITs
md: convert to use the generic badblocks code
block: Add badblock management for gendisks
badblocks: Add core badblock management code
block: fix del_gendisk() vs blkdev_ioctl crash
block: enable dax for raw block devices
block: introduce bdev_file_inode()
restrict /dev/mem to idle io memory ranges
arch: consolidate CONFIG_STRICT_DEVM in lib/Kconfig.debug
...
Diffstat (limited to 'fs/block_dev.c')
| -rw-r--r-- | fs/block_dev.c | 122 |
1 files changed, 107 insertions, 15 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 01b8e0d4b4ff..d878e4860fb7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -156,11 +156,16 @@ blkdev_get_block(struct inode *inode, sector_t iblock, | |||
| 156 | return 0; | 156 | return 0; |
| 157 | } | 157 | } |
| 158 | 158 | ||
| 159 | static struct inode *bdev_file_inode(struct file *file) | ||
| 160 | { | ||
| 161 | return file->f_mapping->host; | ||
| 162 | } | ||
| 163 | |||
| 159 | static ssize_t | 164 | static ssize_t |
| 160 | blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) | 165 | blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) |
| 161 | { | 166 | { |
| 162 | struct file *file = iocb->ki_filp; | 167 | struct file *file = iocb->ki_filp; |
| 163 | struct inode *inode = file->f_mapping->host; | 168 | struct inode *inode = bdev_file_inode(file); |
| 164 | 169 | ||
| 165 | if (IS_DAX(inode)) | 170 | if (IS_DAX(inode)) |
| 166 | return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, | 171 | return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, |
| @@ -338,7 +343,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping, | |||
| 338 | */ | 343 | */ |
| 339 | static loff_t block_llseek(struct file *file, loff_t offset, int whence) | 344 | static loff_t block_llseek(struct file *file, loff_t offset, int whence) |
| 340 | { | 345 | { |
| 341 | struct inode *bd_inode = file->f_mapping->host; | 346 | struct inode *bd_inode = bdev_file_inode(file); |
| 342 | loff_t retval; | 347 | loff_t retval; |
| 343 | 348 | ||
| 344 | mutex_lock(&bd_inode->i_mutex); | 349 | mutex_lock(&bd_inode->i_mutex); |
| @@ -349,7 +354,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence) | |||
| 349 | 354 | ||
| 350 | int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) | 355 | int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) |
| 351 | { | 356 | { |
| 352 | struct inode *bd_inode = filp->f_mapping->host; | 357 | struct inode *bd_inode = bdev_file_inode(filp); |
| 353 | struct block_device *bdev = I_BDEV(bd_inode); | 358 | struct block_device *bdev = I_BDEV(bd_inode); |
| 354 | int error; | 359 | int error; |
| 355 | 360 | ||
| @@ -1224,8 +1229,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
| 1224 | } | 1229 | } |
| 1225 | } | 1230 | } |
| 1226 | 1231 | ||
| 1227 | if (!ret) | 1232 | if (!ret) { |
| 1228 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1233 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
| 1234 | if (!blkdev_dax_capable(bdev)) | ||
| 1235 | bdev->bd_inode->i_flags &= ~S_DAX; | ||
| 1236 | } | ||
| 1229 | 1237 | ||
| 1230 | /* | 1238 | /* |
| 1231 | * If the device is invalidated, rescan partition | 1239 | * If the device is invalidated, rescan partition |
| @@ -1239,6 +1247,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
| 1239 | else if (ret == -ENOMEDIUM) | 1247 | else if (ret == -ENOMEDIUM) |
| 1240 | invalidate_partitions(disk, bdev); | 1248 | invalidate_partitions(disk, bdev); |
| 1241 | } | 1249 | } |
| 1250 | |||
| 1242 | if (ret) | 1251 | if (ret) |
| 1243 | goto out_clear; | 1252 | goto out_clear; |
| 1244 | } else { | 1253 | } else { |
| @@ -1259,12 +1268,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
| 1259 | goto out_clear; | 1268 | goto out_clear; |
| 1260 | } | 1269 | } |
| 1261 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); | 1270 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); |
| 1262 | /* | 1271 | if (!blkdev_dax_capable(bdev)) |
| 1263 | * If the partition is not aligned on a page | ||
| 1264 | * boundary, we can't do dax I/O to it. | ||
| 1265 | */ | ||
| 1266 | if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || | ||
| 1267 | (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) | ||
| 1268 | bdev->bd_inode->i_flags &= ~S_DAX; | 1272 | bdev->bd_inode->i_flags &= ~S_DAX; |
| 1269 | } | 1273 | } |
| 1270 | } else { | 1274 | } else { |
| @@ -1599,14 +1603,14 @@ EXPORT_SYMBOL(blkdev_put); | |||
| 1599 | 1603 | ||
| 1600 | static int blkdev_close(struct inode * inode, struct file * filp) | 1604 | static int blkdev_close(struct inode * inode, struct file * filp) |
| 1601 | { | 1605 | { |
| 1602 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); | 1606 | struct block_device *bdev = I_BDEV(bdev_file_inode(filp)); |
| 1603 | blkdev_put(bdev, filp->f_mode); | 1607 | blkdev_put(bdev, filp->f_mode); |
| 1604 | return 0; | 1608 | return 0; |
| 1605 | } | 1609 | } |
| 1606 | 1610 | ||
| 1607 | static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 1611 | static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
| 1608 | { | 1612 | { |
| 1609 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | 1613 | struct block_device *bdev = I_BDEV(bdev_file_inode(file)); |
| 1610 | fmode_t mode = file->f_mode; | 1614 | fmode_t mode = file->f_mode; |
| 1611 | 1615 | ||
| 1612 | /* | 1616 | /* |
| @@ -1631,7 +1635,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 1631 | ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) | 1635 | ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) |
| 1632 | { | 1636 | { |
| 1633 | struct file *file = iocb->ki_filp; | 1637 | struct file *file = iocb->ki_filp; |
| 1634 | struct inode *bd_inode = file->f_mapping->host; | 1638 | struct inode *bd_inode = bdev_file_inode(file); |
| 1635 | loff_t size = i_size_read(bd_inode); | 1639 | loff_t size = i_size_read(bd_inode); |
| 1636 | struct blk_plug plug; | 1640 | struct blk_plug plug; |
| 1637 | ssize_t ret; | 1641 | ssize_t ret; |
| @@ -1663,7 +1667,7 @@ EXPORT_SYMBOL_GPL(blkdev_write_iter); | |||
| 1663 | ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) | 1667 | ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) |
| 1664 | { | 1668 | { |
| 1665 | struct file *file = iocb->ki_filp; | 1669 | struct file *file = iocb->ki_filp; |
| 1666 | struct inode *bd_inode = file->f_mapping->host; | 1670 | struct inode *bd_inode = bdev_file_inode(file); |
| 1667 | loff_t size = i_size_read(bd_inode); | 1671 | loff_t size = i_size_read(bd_inode); |
| 1668 | loff_t pos = iocb->ki_pos; | 1672 | loff_t pos = iocb->ki_pos; |
| 1669 | 1673 | ||
| @@ -1702,13 +1706,101 @@ static const struct address_space_operations def_blk_aops = { | |||
| 1702 | .is_dirty_writeback = buffer_check_dirty_writeback, | 1706 | .is_dirty_writeback = buffer_check_dirty_writeback, |
| 1703 | }; | 1707 | }; |
| 1704 | 1708 | ||
| 1709 | #ifdef CONFIG_FS_DAX | ||
| 1710 | /* | ||
| 1711 | * In the raw block case we do not need to contend with truncation nor | ||
| 1712 | * unwritten file extents. Without those concerns there is no need for | ||
| 1713 | * additional locking beyond the mmap_sem context that these routines | ||
| 1714 | * are already executing under. | ||
| 1715 | * | ||
| 1716 | * Note, there is no protection if the block device is dynamically | ||
| 1717 | * resized (partition grow/shrink) during a fault. A stable block device | ||
| 1718 | * size is already not enforced in the blkdev_direct_IO path. | ||
| 1719 | * | ||
| 1720 | * For DAX, it is the responsibility of the block device driver to | ||
| 1721 | * ensure the whole-disk device size is stable while requests are in | ||
| 1722 | * flight. | ||
| 1723 | * | ||
| 1724 | * Finally, unlike the filemap_page_mkwrite() case there is no | ||
| 1725 | * filesystem superblock to sync against freezing. We still include a | ||
| 1726 | * pfn_mkwrite callback for dax drivers to receive write fault | ||
| 1727 | * notifications. | ||
| 1728 | */ | ||
| 1729 | static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
| 1730 | { | ||
| 1731 | return __dax_fault(vma, vmf, blkdev_get_block, NULL); | ||
| 1732 | } | ||
| 1733 | |||
| 1734 | static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | ||
| 1735 | pmd_t *pmd, unsigned int flags) | ||
| 1736 | { | ||
| 1737 | return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); | ||
| 1738 | } | ||
| 1739 | |||
| 1740 | static void blkdev_vm_open(struct vm_area_struct *vma) | ||
| 1741 | { | ||
| 1742 | struct inode *bd_inode = bdev_file_inode(vma->vm_file); | ||
| 1743 | struct block_device *bdev = I_BDEV(bd_inode); | ||
| 1744 | |||
| 1745 | mutex_lock(&bd_inode->i_mutex); | ||
| 1746 | bdev->bd_map_count++; | ||
| 1747 | mutex_unlock(&bd_inode->i_mutex); | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | static void blkdev_vm_close(struct vm_area_struct *vma) | ||
| 1751 | { | ||
| 1752 | struct inode *bd_inode = bdev_file_inode(vma->vm_file); | ||
| 1753 | struct block_device *bdev = I_BDEV(bd_inode); | ||
| 1754 | |||
| 1755 | mutex_lock(&bd_inode->i_mutex); | ||
| 1756 | bdev->bd_map_count--; | ||
| 1757 | mutex_unlock(&bd_inode->i_mutex); | ||
| 1758 | } | ||
| 1759 | |||
| 1760 | static const struct vm_operations_struct blkdev_dax_vm_ops = { | ||
| 1761 | .open = blkdev_vm_open, | ||
| 1762 | .close = blkdev_vm_close, | ||
| 1763 | .fault = blkdev_dax_fault, | ||
| 1764 | .pmd_fault = blkdev_dax_pmd_fault, | ||
| 1765 | .pfn_mkwrite = blkdev_dax_fault, | ||
| 1766 | }; | ||
| 1767 | |||
| 1768 | static const struct vm_operations_struct blkdev_default_vm_ops = { | ||
| 1769 | .open = blkdev_vm_open, | ||
| 1770 | .close = blkdev_vm_close, | ||
| 1771 | .fault = filemap_fault, | ||
| 1772 | .map_pages = filemap_map_pages, | ||
| 1773 | }; | ||
| 1774 | |||
| 1775 | static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 1776 | { | ||
| 1777 | struct inode *bd_inode = bdev_file_inode(file); | ||
| 1778 | struct block_device *bdev = I_BDEV(bd_inode); | ||
| 1779 | |||
| 1780 | file_accessed(file); | ||
| 1781 | mutex_lock(&bd_inode->i_mutex); | ||
| 1782 | bdev->bd_map_count++; | ||
| 1783 | if (IS_DAX(bd_inode)) { | ||
| 1784 | vma->vm_ops = &blkdev_dax_vm_ops; | ||
| 1785 | vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; | ||
| 1786 | } else { | ||
| 1787 | vma->vm_ops = &blkdev_default_vm_ops; | ||
| 1788 | } | ||
| 1789 | mutex_unlock(&bd_inode->i_mutex); | ||
| 1790 | |||
| 1791 | return 0; | ||
| 1792 | } | ||
| 1793 | #else | ||
| 1794 | #define blkdev_mmap generic_file_mmap | ||
| 1795 | #endif | ||
| 1796 | |||
| 1705 | const struct file_operations def_blk_fops = { | 1797 | const struct file_operations def_blk_fops = { |
| 1706 | .open = blkdev_open, | 1798 | .open = blkdev_open, |
| 1707 | .release = blkdev_close, | 1799 | .release = blkdev_close, |
| 1708 | .llseek = block_llseek, | 1800 | .llseek = block_llseek, |
| 1709 | .read_iter = blkdev_read_iter, | 1801 | .read_iter = blkdev_read_iter, |
| 1710 | .write_iter = blkdev_write_iter, | 1802 | .write_iter = blkdev_write_iter, |
| 1711 | .mmap = generic_file_mmap, | 1803 | .mmap = blkdev_mmap, |
| 1712 | .fsync = blkdev_fsync, | 1804 | .fsync = blkdev_fsync, |
| 1713 | .unlocked_ioctl = block_ioctl, | 1805 | .unlocked_ioctl = block_ioctl, |
| 1714 | #ifdef CONFIG_COMPAT | 1806 | #ifdef CONFIG_COMPAT |
