diff options
author | Dan Williams <dan.j.williams@intel.com> | 2016-05-07 14:40:28 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2016-05-21 01:02:56 -0400 |
commit | acc93d30d7d43f428272c20a047389c4cbca82ba (patch) | |
tree | e13ee3ce79c671c3a148818631e40c1c320dbed2 | |
parent | dee410792419aaa8bc3e3b35d2ccb6515835916d (diff) |
Revert "block: enable dax for raw block devices"
This reverts commit 5a023cdba50c5f5f2bc351783b3131699deb3937.
The functionality is superseded by the new "Device DAX" facility.
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Jan Kara <jack@suse.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r-- | block/ioctl.c | 32 | ||||
-rw-r--r-- | fs/block_dev.c | 96 | ||||
-rw-r--r-- | include/linux/fs.h | 8 | ||||
-rw-r--r-- | include/uapi/linux/fs.h | 1 |
4 files changed, 29 insertions, 108 deletions
diff --git a/block/ioctl.c b/block/ioctl.c index 4ff1f92f89ca..698c7933d582 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -407,35 +407,6 @@ static inline int is_unrecognized_ioctl(int ret) | |||
407 | ret == -ENOIOCTLCMD; | 407 | ret == -ENOIOCTLCMD; |
408 | } | 408 | } |
409 | 409 | ||
410 | #ifdef CONFIG_FS_DAX | ||
411 | bool blkdev_dax_capable(struct block_device *bdev) | ||
412 | { | ||
413 | struct gendisk *disk = bdev->bd_disk; | ||
414 | |||
415 | if (!disk->fops->direct_access) | ||
416 | return false; | ||
417 | |||
418 | /* | ||
419 | * If the partition is not aligned on a page boundary, we can't | ||
420 | * do dax I/O to it. | ||
421 | */ | ||
422 | if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) | ||
423 | || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) | ||
424 | return false; | ||
425 | |||
426 | /* | ||
427 | * If the device has known bad blocks, force all I/O through the | ||
428 | * driver / page cache. | ||
429 | * | ||
430 | * TODO: support finer grained dax error handling | ||
431 | */ | ||
432 | if (disk->bb && disk->bb->count) | ||
433 | return false; | ||
434 | |||
435 | return true; | ||
436 | } | ||
437 | #endif | ||
438 | |||
439 | static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, | 410 | static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, |
440 | unsigned cmd, unsigned long arg) | 411 | unsigned cmd, unsigned long arg) |
441 | { | 412 | { |
@@ -598,9 +569,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, | |||
598 | case BLKTRACESETUP: | 569 | case BLKTRACESETUP: |
599 | case BLKTRACETEARDOWN: | 570 | case BLKTRACETEARDOWN: |
600 | return blk_trace_ioctl(bdev, cmd, argp); | 571 | return blk_trace_ioctl(bdev, cmd, argp); |
601 | case BLKDAXGET: | ||
602 | return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX)); | ||
603 | break; | ||
604 | case IOC_PR_REGISTER: | 572 | case IOC_PR_REGISTER: |
605 | return blkdev_pr_register(bdev, argp); | 573 | return blkdev_pr_register(bdev, argp); |
606 | case IOC_PR_RESERVE: | 574 | case IOC_PR_RESERVE: |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 20a2c02b77c4..36ee10ca503e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/log2.h> | 29 | #include <linux/log2.h> |
30 | #include <linux/cleancache.h> | 30 | #include <linux/cleancache.h> |
31 | #include <linux/dax.h> | 31 | #include <linux/dax.h> |
32 | #include <linux/badblocks.h> | ||
32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
33 | #include "internal.h" | 34 | #include "internal.h" |
34 | 35 | ||
@@ -1159,6 +1160,33 @@ void bd_set_size(struct block_device *bdev, loff_t size) | |||
1159 | } | 1160 | } |
1160 | EXPORT_SYMBOL(bd_set_size); | 1161 | EXPORT_SYMBOL(bd_set_size); |
1161 | 1162 | ||
1163 | static bool blkdev_dax_capable(struct block_device *bdev) | ||
1164 | { | ||
1165 | struct gendisk *disk = bdev->bd_disk; | ||
1166 | |||
1167 | if (!disk->fops->direct_access || !IS_ENABLED(CONFIG_FS_DAX)) | ||
1168 | return false; | ||
1169 | |||
1170 | /* | ||
1171 | * If the partition is not aligned on a page boundary, we can't | ||
1172 | * do dax I/O to it. | ||
1173 | */ | ||
1174 | if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) | ||
1175 | || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) | ||
1176 | return false; | ||
1177 | |||
1178 | /* | ||
1179 | * If the device has known bad blocks, force all I/O through the | ||
1180 | * driver / page cache. | ||
1181 | * | ||
1182 | * TODO: support finer grained dax error handling | ||
1183 | */ | ||
1184 | if (disk->bb && disk->bb->count) | ||
1185 | return false; | ||
1186 | |||
1187 | return true; | ||
1188 | } | ||
1189 | |||
1162 | static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); | 1190 | static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); |
1163 | 1191 | ||
1164 | /* | 1192 | /* |
@@ -1724,79 +1752,13 @@ static const struct address_space_operations def_blk_aops = { | |||
1724 | .is_dirty_writeback = buffer_check_dirty_writeback, | 1752 | .is_dirty_writeback = buffer_check_dirty_writeback, |
1725 | }; | 1753 | }; |
1726 | 1754 | ||
1727 | #ifdef CONFIG_FS_DAX | ||
1728 | /* | ||
1729 | * In the raw block case we do not need to contend with truncation nor | ||
1730 | * unwritten file extents. Without those concerns there is no need for | ||
1731 | * additional locking beyond the mmap_sem context that these routines | ||
1732 | * are already executing under. | ||
1733 | * | ||
1734 | * Note, there is no protection if the block device is dynamically | ||
1735 | * resized (partition grow/shrink) during a fault. A stable block device | ||
1736 | * size is already not enforced in the blkdev_direct_IO path. | ||
1737 | * | ||
1738 | * For DAX, it is the responsibility of the block device driver to | ||
1739 | * ensure the whole-disk device size is stable while requests are in | ||
1740 | * flight. | ||
1741 | * | ||
1742 | * Finally, unlike the filemap_page_mkwrite() case there is no | ||
1743 | * filesystem superblock to sync against freezing. We still include a | ||
1744 | * pfn_mkwrite callback for dax drivers to receive write fault | ||
1745 | * notifications. | ||
1746 | */ | ||
1747 | static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
1748 | { | ||
1749 | return __dax_fault(vma, vmf, blkdev_get_block, NULL); | ||
1750 | } | ||
1751 | |||
1752 | static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma, | ||
1753 | struct vm_fault *vmf) | ||
1754 | { | ||
1755 | return dax_pfn_mkwrite(vma, vmf); | ||
1756 | } | ||
1757 | |||
1758 | static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | ||
1759 | pmd_t *pmd, unsigned int flags) | ||
1760 | { | ||
1761 | return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); | ||
1762 | } | ||
1763 | |||
1764 | static const struct vm_operations_struct blkdev_dax_vm_ops = { | ||
1765 | .fault = blkdev_dax_fault, | ||
1766 | .pmd_fault = blkdev_dax_pmd_fault, | ||
1767 | .pfn_mkwrite = blkdev_dax_pfn_mkwrite, | ||
1768 | }; | ||
1769 | |||
1770 | static const struct vm_operations_struct blkdev_default_vm_ops = { | ||
1771 | .fault = filemap_fault, | ||
1772 | .map_pages = filemap_map_pages, | ||
1773 | }; | ||
1774 | |||
1775 | static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) | ||
1776 | { | ||
1777 | struct inode *bd_inode = bdev_file_inode(file); | ||
1778 | |||
1779 | file_accessed(file); | ||
1780 | if (IS_DAX(bd_inode)) { | ||
1781 | vma->vm_ops = &blkdev_dax_vm_ops; | ||
1782 | vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; | ||
1783 | } else { | ||
1784 | vma->vm_ops = &blkdev_default_vm_ops; | ||
1785 | } | ||
1786 | |||
1787 | return 0; | ||
1788 | } | ||
1789 | #else | ||
1790 | #define blkdev_mmap generic_file_mmap | ||
1791 | #endif | ||
1792 | |||
1793 | const struct file_operations def_blk_fops = { | 1755 | const struct file_operations def_blk_fops = { |
1794 | .open = blkdev_open, | 1756 | .open = blkdev_open, |
1795 | .release = blkdev_close, | 1757 | .release = blkdev_close, |
1796 | .llseek = block_llseek, | 1758 | .llseek = block_llseek, |
1797 | .read_iter = blkdev_read_iter, | 1759 | .read_iter = blkdev_read_iter, |
1798 | .write_iter = blkdev_write_iter, | 1760 | .write_iter = blkdev_write_iter, |
1799 | .mmap = blkdev_mmap, | 1761 | .mmap = generic_file_mmap, |
1800 | .fsync = blkdev_fsync, | 1762 | .fsync = blkdev_fsync, |
1801 | .unlocked_ioctl = block_ioctl, | 1763 | .unlocked_ioctl = block_ioctl, |
1802 | #ifdef CONFIG_COMPAT | 1764 | #ifdef CONFIG_COMPAT |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 70e61b58baaf..8363a10660f6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -2320,14 +2320,6 @@ extern struct super_block *freeze_bdev(struct block_device *); | |||
2320 | extern void emergency_thaw_all(void); | 2320 | extern void emergency_thaw_all(void); |
2321 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); | 2321 | extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); |
2322 | extern int fsync_bdev(struct block_device *); | 2322 | extern int fsync_bdev(struct block_device *); |
2323 | #ifdef CONFIG_FS_DAX | ||
2324 | extern bool blkdev_dax_capable(struct block_device *bdev); | ||
2325 | #else | ||
2326 | static inline bool blkdev_dax_capable(struct block_device *bdev) | ||
2327 | { | ||
2328 | return false; | ||
2329 | } | ||
2330 | #endif | ||
2331 | 2323 | ||
2332 | extern struct super_block *blockdev_superblock; | 2324 | extern struct super_block *blockdev_superblock; |
2333 | 2325 | ||
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a079d50376e1..fbff8b28aa35 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h | |||
@@ -222,7 +222,6 @@ struct fsxattr { | |||
222 | #define BLKSECDISCARD _IO(0x12,125) | 222 | #define BLKSECDISCARD _IO(0x12,125) |
223 | #define BLKROTATIONAL _IO(0x12,126) | 223 | #define BLKROTATIONAL _IO(0x12,126) |
224 | #define BLKZEROOUT _IO(0x12,127) | 224 | #define BLKZEROOUT _IO(0x12,127) |
225 | #define BLKDAXGET _IO(0x12,129) | ||
226 | 225 | ||
227 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ | 226 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ |
228 | #define FIBMAP _IO(0x00,1) /* bmap access */ | 227 | #define FIBMAP _IO(0x00,1) /* bmap access */ |