diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-13 22:15:14 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-13 22:15:14 -0500 |
commit | d080827f850ba4df5b955d5ca8c8c0fc92fe18c0 (patch) | |
tree | 37262315200bbbe50bdd64ce3011951a92855159 /fs/block_dev.c | |
parent | cbd88cd4c07f9361914ab7fd7e21c9227986fe68 (diff) | |
parent | 8b63b6bfc1a551acf154061699028c7032d7890c (diff) |
Merge tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"The bulk of this has appeared in -next and independently received a
build success notification from the kbuild robot. The 'for-4.5/block-
dax' topic branch was rebased over the weekend to drop the "block
device end-of-life" rework that Al would like to see re-implemented
with a notifier, and to address bug reports against the badblocks
integration.
There is pending feedback against "libnvdimm: Add a poison list and
export badblocks" received last week. Linda identified some localized
fixups that we will handle incrementally.
Summary:
- Media error handling: The 'badblocks' implementation that
originated in md-raid is up-levelled to a generic capability of a
block device. This initial implementation is limited to being
consulted in the pmem block-i/o path. Later, 'badblocks' will be
consulted when creating dax mappings.
- Raw block device dax: For virtualization and other cases that want
large contiguous mappings of persistent memory, add the capability
to dax-mmap a block device directly.
- Increased /dev/mem restrictions: Add an option to treat all
io-memory as IORESOURCE_EXCLUSIVE, i.e. disable /dev/mem access
while a driver is actively using an address range. This behavior
is controlled via the new CONFIG_IO_STRICT_DEVMEM option and can be
overridden by the existing "iomem=relaxed" kernel command line
option.
- Miscellaneous fixes include a 'pfn'-device huge page alignment fix,
block device shutdown crash fix, and other small libnvdimm fixes"
* tag 'libnvdimm-for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (32 commits)
block: kill disk_{check|set|clear|alloc}_badblocks
libnvdimm, pmem: nvdimm_read_bytes() badblocks support
pmem, dax: disable dax in the presence of bad blocks
pmem: fail io-requests to known bad blocks
libnvdimm: convert to statically allocated badblocks
libnvdimm: don't fail init for full badblocks list
block, badblocks: introduce devm_init_badblocks
block: clarify badblocks lifetime
badblocks: rename badblocks_free to badblocks_exit
libnvdimm, pmem: move definition of nvdimm_namespace_add_poison to nd.h
libnvdimm: Add a poison list and export badblocks
nfit_test: Enable DSMs for all test NFITs
md: convert to use the generic badblocks code
block: Add badblock management for gendisks
badblocks: Add core badblock management code
block: fix del_gendisk() vs blkdev_ioctl crash
block: enable dax for raw block devices
block: introduce bdev_file_inode()
restrict /dev/mem to idle io memory ranges
arch: consolidate CONFIG_STRICT_DEVM in lib/Kconfig.debug
...
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r-- | fs/block_dev.c | 122 |
1 files changed, 107 insertions, 15 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 01b8e0d4b4ff..d878e4860fb7 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -156,11 +156,16 @@ blkdev_get_block(struct inode *inode, sector_t iblock, | |||
156 | return 0; | 156 | return 0; |
157 | } | 157 | } |
158 | 158 | ||
159 | static struct inode *bdev_file_inode(struct file *file) | ||
160 | { | ||
161 | return file->f_mapping->host; | ||
162 | } | ||
163 | |||
159 | static ssize_t | 164 | static ssize_t |
160 | blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) | 165 | blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) |
161 | { | 166 | { |
162 | struct file *file = iocb->ki_filp; | 167 | struct file *file = iocb->ki_filp; |
163 | struct inode *inode = file->f_mapping->host; | 168 | struct inode *inode = bdev_file_inode(file); |
164 | 169 | ||
165 | if (IS_DAX(inode)) | 170 | if (IS_DAX(inode)) |
166 | return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, | 171 | return dax_do_io(iocb, inode, iter, offset, blkdev_get_block, |
@@ -338,7 +343,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping, | |||
338 | */ | 343 | */ |
339 | static loff_t block_llseek(struct file *file, loff_t offset, int whence) | 344 | static loff_t block_llseek(struct file *file, loff_t offset, int whence) |
340 | { | 345 | { |
341 | struct inode *bd_inode = file->f_mapping->host; | 346 | struct inode *bd_inode = bdev_file_inode(file); |
342 | loff_t retval; | 347 | loff_t retval; |
343 | 348 | ||
344 | mutex_lock(&bd_inode->i_mutex); | 349 | mutex_lock(&bd_inode->i_mutex); |
@@ -349,7 +354,7 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence) | |||
349 | 354 | ||
350 | int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) | 355 | int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) |
351 | { | 356 | { |
352 | struct inode *bd_inode = filp->f_mapping->host; | 357 | struct inode *bd_inode = bdev_file_inode(filp); |
353 | struct block_device *bdev = I_BDEV(bd_inode); | 358 | struct block_device *bdev = I_BDEV(bd_inode); |
354 | int error; | 359 | int error; |
355 | 360 | ||
@@ -1224,8 +1229,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1224 | } | 1229 | } |
1225 | } | 1230 | } |
1226 | 1231 | ||
1227 | if (!ret) | 1232 | if (!ret) { |
1228 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1233 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
1234 | if (!blkdev_dax_capable(bdev)) | ||
1235 | bdev->bd_inode->i_flags &= ~S_DAX; | ||
1236 | } | ||
1229 | 1237 | ||
1230 | /* | 1238 | /* |
1231 | * If the device is invalidated, rescan partition | 1239 | * If the device is invalidated, rescan partition |
@@ -1239,6 +1247,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1239 | else if (ret == -ENOMEDIUM) | 1247 | else if (ret == -ENOMEDIUM) |
1240 | invalidate_partitions(disk, bdev); | 1248 | invalidate_partitions(disk, bdev); |
1241 | } | 1249 | } |
1250 | |||
1242 | if (ret) | 1251 | if (ret) |
1243 | goto out_clear; | 1252 | goto out_clear; |
1244 | } else { | 1253 | } else { |
@@ -1259,12 +1268,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1259 | goto out_clear; | 1268 | goto out_clear; |
1260 | } | 1269 | } |
1261 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); | 1270 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); |
1262 | /* | 1271 | if (!blkdev_dax_capable(bdev)) |
1263 | * If the partition is not aligned on a page | ||
1264 | * boundary, we can't do dax I/O to it. | ||
1265 | */ | ||
1266 | if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) || | ||
1267 | (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) | ||
1268 | bdev->bd_inode->i_flags &= ~S_DAX; | 1272 | bdev->bd_inode->i_flags &= ~S_DAX; |
1269 | } | 1273 | } |
1270 | } else { | 1274 | } else { |
@@ -1599,14 +1603,14 @@ EXPORT_SYMBOL(blkdev_put); | |||
1599 | 1603 | ||
1600 | static int blkdev_close(struct inode * inode, struct file * filp) | 1604 | static int blkdev_close(struct inode * inode, struct file * filp) |
1601 | { | 1605 | { |
1602 | struct block_device *bdev = I_BDEV(filp->f_mapping->host); | 1606 | struct block_device *bdev = I_BDEV(bdev_file_inode(filp)); |
1603 | blkdev_put(bdev, filp->f_mode); | 1607 | blkdev_put(bdev, filp->f_mode); |
1604 | return 0; | 1608 | return 0; |
1605 | } | 1609 | } |
1606 | 1610 | ||
1607 | static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | 1611 | static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) |
1608 | { | 1612 | { |
1609 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | 1613 | struct block_device *bdev = I_BDEV(bdev_file_inode(file)); |
1610 | fmode_t mode = file->f_mode; | 1614 | fmode_t mode = file->f_mode; |
1611 | 1615 | ||
1612 | /* | 1616 | /* |
@@ -1631,7 +1635,7 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
1631 | ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) | 1635 | ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) |
1632 | { | 1636 | { |
1633 | struct file *file = iocb->ki_filp; | 1637 | struct file *file = iocb->ki_filp; |
1634 | struct inode *bd_inode = file->f_mapping->host; | 1638 | struct inode *bd_inode = bdev_file_inode(file); |
1635 | loff_t size = i_size_read(bd_inode); | 1639 | loff_t size = i_size_read(bd_inode); |
1636 | struct blk_plug plug; | 1640 | struct blk_plug plug; |
1637 | ssize_t ret; | 1641 | ssize_t ret; |
@@ -1663,7 +1667,7 @@ EXPORT_SYMBOL_GPL(blkdev_write_iter); | |||
1663 | ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) | 1667 | ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) |
1664 | { | 1668 | { |
1665 | struct file *file = iocb->ki_filp; | 1669 | struct file *file = iocb->ki_filp; |
1666 | struct inode *bd_inode = file->f_mapping->host; | 1670 | struct inode *bd_inode = bdev_file_inode(file); |
1667 | loff_t size = i_size_read(bd_inode); | 1671 | loff_t size = i_size_read(bd_inode); |
1668 | loff_t pos = iocb->ki_pos; | 1672 | loff_t pos = iocb->ki_pos; |
1669 | 1673 | ||
@@ -1702,13 +1706,101 @@ static const struct address_space_operations def_blk_aops = { | |||
1702 | .is_dirty_writeback = buffer_check_dirty_writeback, | 1706 | .is_dirty_writeback = buffer_check_dirty_writeback, |
1703 | }; | 1707 | }; |
1704 | 1708 | ||
1709 | #ifdef CONFIG_FS_DAX | ||
1710 | /* | ||
1711 | * In the raw block case we do not need to contend with truncation nor | ||
1712 | * unwritten file extents. Without those concerns there is no need for | ||
1713 | * additional locking beyond the mmap_sem context that these routines | ||
1714 | * are already executing under. | ||
1715 | * | ||
1716 | * Note, there is no protection if the block device is dynamically | ||
1717 | * resized (partition grow/shrink) during a fault. A stable block device | ||
1718 | * size is already not enforced in the blkdev_direct_IO path. | ||
1719 | * | ||
1720 | * For DAX, it is the responsibility of the block device driver to | ||
1721 | * ensure the whole-disk device size is stable while requests are in | ||
1722 | * flight. | ||
1723 | * | ||
1724 | * Finally, unlike the filemap_page_mkwrite() case there is no | ||
1725 | * filesystem superblock to sync against freezing. We still include a | ||
1726 | * pfn_mkwrite callback for dax drivers to receive write fault | ||
1727 | * notifications. | ||
1728 | */ | ||
1729 | static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
1730 | { | ||
1731 | return __dax_fault(vma, vmf, blkdev_get_block, NULL); | ||
1732 | } | ||
1733 | |||
1734 | static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | ||
1735 | pmd_t *pmd, unsigned int flags) | ||
1736 | { | ||
1737 | return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); | ||
1738 | } | ||
1739 | |||
1740 | static void blkdev_vm_open(struct vm_area_struct *vma) | ||
1741 | { | ||
1742 | struct inode *bd_inode = bdev_file_inode(vma->vm_file); | ||
1743 | struct block_device *bdev = I_BDEV(bd_inode); | ||
1744 | |||
1745 | mutex_lock(&bd_inode->i_mutex); | ||
1746 | bdev->bd_map_count++; | ||
1747 | mutex_unlock(&bd_inode->i_mutex); | ||
1748 | } | ||
1749 | |||
1750 | static void blkdev_vm_close(struct vm_area_struct *vma) | ||
1751 | { | ||
1752 | struct inode *bd_inode = bdev_file_inode(vma->vm_file); | ||
1753 | struct block_device *bdev = I_BDEV(bd_inode); | ||
1754 | |||
1755 | mutex_lock(&bd_inode->i_mutex); | ||
1756 | bdev->bd_map_count--; | ||
1757 | mutex_unlock(&bd_inode->i_mutex); | ||
1758 | } | ||
1759 | |||
1760 | static const struct vm_operations_struct blkdev_dax_vm_ops = { | ||
1761 | .open = blkdev_vm_open, | ||
1762 | .close = blkdev_vm_close, | ||
1763 | .fault = blkdev_dax_fault, | ||
1764 | .pmd_fault = blkdev_dax_pmd_fault, | ||
1765 | .pfn_mkwrite = blkdev_dax_fault, | ||
1766 | }; | ||
1767 | |||
1768 | static const struct vm_operations_struct blkdev_default_vm_ops = { | ||
1769 | .open = blkdev_vm_open, | ||
1770 | .close = blkdev_vm_close, | ||
1771 | .fault = filemap_fault, | ||
1772 | .map_pages = filemap_map_pages, | ||
1773 | }; | ||
1774 | |||
1775 | static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) | ||
1776 | { | ||
1777 | struct inode *bd_inode = bdev_file_inode(file); | ||
1778 | struct block_device *bdev = I_BDEV(bd_inode); | ||
1779 | |||
1780 | file_accessed(file); | ||
1781 | mutex_lock(&bd_inode->i_mutex); | ||
1782 | bdev->bd_map_count++; | ||
1783 | if (IS_DAX(bd_inode)) { | ||
1784 | vma->vm_ops = &blkdev_dax_vm_ops; | ||
1785 | vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; | ||
1786 | } else { | ||
1787 | vma->vm_ops = &blkdev_default_vm_ops; | ||
1788 | } | ||
1789 | mutex_unlock(&bd_inode->i_mutex); | ||
1790 | |||
1791 | return 0; | ||
1792 | } | ||
1793 | #else | ||
1794 | #define blkdev_mmap generic_file_mmap | ||
1795 | #endif | ||
1796 | |||
1705 | const struct file_operations def_blk_fops = { | 1797 | const struct file_operations def_blk_fops = { |
1706 | .open = blkdev_open, | 1798 | .open = blkdev_open, |
1707 | .release = blkdev_close, | 1799 | .release = blkdev_close, |
1708 | .llseek = block_llseek, | 1800 | .llseek = block_llseek, |
1709 | .read_iter = blkdev_read_iter, | 1801 | .read_iter = blkdev_read_iter, |
1710 | .write_iter = blkdev_write_iter, | 1802 | .write_iter = blkdev_write_iter, |
1711 | .mmap = generic_file_mmap, | 1803 | .mmap = blkdev_mmap, |
1712 | .fsync = blkdev_fsync, | 1804 | .fsync = blkdev_fsync, |
1713 | .unlocked_ioctl = block_ioctl, | 1805 | .unlocked_ioctl = block_ioctl, |
1714 | #ifdef CONFIG_COMPAT | 1806 | #ifdef CONFIG_COMPAT |