diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-26 22:34:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-26 22:34:26 -0400 |
commit | 315227f6da389f3a560f27f7777080857278e1b4 (patch) | |
tree | 11306e1e8d8b66044ab48901b90141b5362c12e3 /fs | |
parent | a10c38a4f385f5d7c173a263ff6bb2d36021b3bb (diff) | |
parent | 40543f62cbdce42633e3fe10923099feee272e1f (diff) |
Merge tag 'dax-misc-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull misc DAX updates from Vishal Verma:
"DAX error handling for 4.7
- Until now, dax has been disabled if media errors were found on any
device. This enables the use of DAX in the presence of these
errors by making all sector-aligned zeroing go through the driver.
- The driver (already) has the ability to clear errors on writes that
are sent through the block layer using 'DSMs' defined in ACPI 6.1.
Other misc changes:
- When mounting DAX filesystems, check to make sure the partition is
page aligned. This is a requirement for DAX, and previously, we
allowed such unaligned mounts to succeed, but subsequent
reads/writes would fail.
- Misc/cleanup fixes from Jan that remove unused code from DAX
related to zeroing, writeback, and some size checks"
* tag 'dax-misc-for-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm:
dax: fix a comment in dax_zero_page_range and dax_truncate_page
dax: for truncate/hole-punch, do zeroing through the driver if possible
dax: export a low-level __dax_zero_page_range helper
dax: use sb_issue_zerout instead of calling dax_clear_sectors
dax: enable dax in the presence of known media errors (badblocks)
dax: fallback from pmd to pte on error
block: Update blkdev_dax_capable() for consistency
xfs: Add alignment check for DAX mount
ext2: Add alignment check for DAX mount
ext4: Add alignment check for DAX mount
block: Add bdev_dax_supported() for dax mount checks
block: Add vfs_msg() interface
dax: Remove redundant inode size checks
dax: Remove pointless writeback from dax_do_io()
dax: Remove zeroing from dax_io()
dax: Remove dead zeroing code from fault handlers
ext2: Avoid DAX zeroing to corrupt data
ext2: Fix block zeroing in ext2_get_blocks() for DAX
dax: Remove complete_unwritten argument
DAX: move RADIX_DAX_ definitions to dax.c
Diffstat (limited to 'fs')
-rw-r--r-- | fs/block_dev.c | 114 | ||||
-rw-r--r-- | fs/dax.c | 257 | ||||
-rw-r--r-- | fs/ext2/file.c | 4 | ||||
-rw-r--r-- | fs/ext2/inode.c | 12 | ||||
-rw-r--r-- | fs/ext2/super.c | 11 | ||||
-rw-r--r-- | fs/ext4/file.c | 4 | ||||
-rw-r--r-- | fs/ext4/super.c | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 12 |
10 files changed, 173 insertions, 274 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 1089dbf25925..71ccab1d22c6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -51,6 +51,18 @@ struct block_device *I_BDEV(struct inode *inode) | |||
51 | } | 51 | } |
52 | EXPORT_SYMBOL(I_BDEV); | 52 | EXPORT_SYMBOL(I_BDEV); |
53 | 53 | ||
54 | void __vfs_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) | ||
55 | { | ||
56 | struct va_format vaf; | ||
57 | va_list args; | ||
58 | |||
59 | va_start(args, fmt); | ||
60 | vaf.fmt = fmt; | ||
61 | vaf.va = &args; | ||
62 | printk_ratelimited("%sVFS (%s): %pV\n", prefix, sb->s_id, &vaf); | ||
63 | va_end(args); | ||
64 | } | ||
65 | |||
54 | static void bdev_write_inode(struct block_device *bdev) | 66 | static void bdev_write_inode(struct block_device *bdev) |
55 | { | 67 | { |
56 | struct inode *inode = bdev->bd_inode; | 68 | struct inode *inode = bdev->bd_inode; |
@@ -489,7 +501,7 @@ long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax) | |||
489 | sector += get_start_sect(bdev); | 501 | sector += get_start_sect(bdev); |
490 | if (sector % (PAGE_SIZE / 512)) | 502 | if (sector % (PAGE_SIZE / 512)) |
491 | return -EINVAL; | 503 | return -EINVAL; |
492 | avail = ops->direct_access(bdev, sector, &dax->addr, &dax->pfn); | 504 | avail = ops->direct_access(bdev, sector, &dax->addr, &dax->pfn, size); |
493 | if (!avail) | 505 | if (!avail) |
494 | return -ERANGE; | 506 | return -ERANGE; |
495 | if (avail > 0 && avail & ~PAGE_MASK) | 507 | if (avail > 0 && avail & ~PAGE_MASK) |
@@ -498,6 +510,75 @@ long bdev_direct_access(struct block_device *bdev, struct blk_dax_ctl *dax) | |||
498 | } | 510 | } |
499 | EXPORT_SYMBOL_GPL(bdev_direct_access); | 511 | EXPORT_SYMBOL_GPL(bdev_direct_access); |
500 | 512 | ||
513 | /** | ||
514 | * bdev_dax_supported() - Check if the device supports dax for filesystem | ||
515 | * @sb: The superblock of the device | ||
516 | * @blocksize: The block size of the device | ||
517 | * | ||
518 | * This is a library function for filesystems to check if the block device | ||
519 | * can be mounted with dax option. | ||
520 | * | ||
521 | * Return: negative errno if unsupported, 0 if supported. | ||
522 | */ | ||
523 | int bdev_dax_supported(struct super_block *sb, int blocksize) | ||
524 | { | ||
525 | struct blk_dax_ctl dax = { | ||
526 | .sector = 0, | ||
527 | .size = PAGE_SIZE, | ||
528 | }; | ||
529 | int err; | ||
530 | |||
531 | if (blocksize != PAGE_SIZE) { | ||
532 | vfs_msg(sb, KERN_ERR, "error: unsupported blocksize for dax"); | ||
533 | return -EINVAL; | ||
534 | } | ||
535 | |||
536 | err = bdev_direct_access(sb->s_bdev, &dax); | ||
537 | if (err < 0) { | ||
538 | switch (err) { | ||
539 | case -EOPNOTSUPP: | ||
540 | vfs_msg(sb, KERN_ERR, | ||
541 | "error: device does not support dax"); | ||
542 | break; | ||
543 | case -EINVAL: | ||
544 | vfs_msg(sb, KERN_ERR, | ||
545 | "error: unaligned partition for dax"); | ||
546 | break; | ||
547 | default: | ||
548 | vfs_msg(sb, KERN_ERR, | ||
549 | "error: dax access failed (%d)", err); | ||
550 | } | ||
551 | return err; | ||
552 | } | ||
553 | |||
554 | return 0; | ||
555 | } | ||
556 | EXPORT_SYMBOL_GPL(bdev_dax_supported); | ||
557 | |||
558 | /** | ||
559 | * bdev_dax_capable() - Return if the raw device is capable for dax | ||
560 | * @bdev: The device for raw block device access | ||
561 | */ | ||
562 | bool bdev_dax_capable(struct block_device *bdev) | ||
563 | { | ||
564 | struct blk_dax_ctl dax = { | ||
565 | .size = PAGE_SIZE, | ||
566 | }; | ||
567 | |||
568 | if (!IS_ENABLED(CONFIG_FS_DAX)) | ||
569 | return false; | ||
570 | |||
571 | dax.sector = 0; | ||
572 | if (bdev_direct_access(bdev, &dax) < 0) | ||
573 | return false; | ||
574 | |||
575 | dax.sector = bdev->bd_part->nr_sects - (PAGE_SIZE / 512); | ||
576 | if (bdev_direct_access(bdev, &dax) < 0) | ||
577 | return false; | ||
578 | |||
579 | return true; | ||
580 | } | ||
581 | |||
501 | /* | 582 | /* |
502 | * pseudo-fs | 583 | * pseudo-fs |
503 | */ | 584 | */ |
@@ -1160,33 +1241,6 @@ void bd_set_size(struct block_device *bdev, loff_t size) | |||
1160 | } | 1241 | } |
1161 | EXPORT_SYMBOL(bd_set_size); | 1242 | EXPORT_SYMBOL(bd_set_size); |
1162 | 1243 | ||
1163 | static bool blkdev_dax_capable(struct block_device *bdev) | ||
1164 | { | ||
1165 | struct gendisk *disk = bdev->bd_disk; | ||
1166 | |||
1167 | if (!disk->fops->direct_access || !IS_ENABLED(CONFIG_FS_DAX)) | ||
1168 | return false; | ||
1169 | |||
1170 | /* | ||
1171 | * If the partition is not aligned on a page boundary, we can't | ||
1172 | * do dax I/O to it. | ||
1173 | */ | ||
1174 | if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) | ||
1175 | || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) | ||
1176 | return false; | ||
1177 | |||
1178 | /* | ||
1179 | * If the device has known bad blocks, force all I/O through the | ||
1180 | * driver / page cache. | ||
1181 | * | ||
1182 | * TODO: support finer grained dax error handling | ||
1183 | */ | ||
1184 | if (disk->bb && disk->bb->count) | ||
1185 | return false; | ||
1186 | |||
1187 | return true; | ||
1188 | } | ||
1189 | |||
1190 | static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); | 1244 | static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); |
1191 | 1245 | ||
1192 | /* | 1246 | /* |
@@ -1266,7 +1320,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1266 | 1320 | ||
1267 | if (!ret) { | 1321 | if (!ret) { |
1268 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1322 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
1269 | if (!blkdev_dax_capable(bdev)) | 1323 | if (!bdev_dax_capable(bdev)) |
1270 | bdev->bd_inode->i_flags &= ~S_DAX; | 1324 | bdev->bd_inode->i_flags &= ~S_DAX; |
1271 | } | 1325 | } |
1272 | 1326 | ||
@@ -1303,7 +1357,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1303 | goto out_clear; | 1357 | goto out_clear; |
1304 | } | 1358 | } |
1305 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); | 1359 | bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); |
1306 | if (!blkdev_dax_capable(bdev)) | 1360 | if (!bdev_dax_capable(bdev)) |
1307 | bdev->bd_inode->i_flags &= ~S_DAX; | 1361 | bdev->bd_inode->i_flags &= ~S_DAX; |
1308 | } | 1362 | } |
1309 | } else { | 1363 | } else { |
@@ -87,50 +87,6 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n) | |||
87 | return page; | 87 | return page; |
88 | } | 88 | } |
89 | 89 | ||
90 | /* | ||
91 | * dax_clear_sectors() is called from within transaction context from XFS, | ||
92 | * and hence this means the stack from this point must follow GFP_NOFS | ||
93 | * semantics for all operations. | ||
94 | */ | ||
95 | int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size) | ||
96 | { | ||
97 | struct blk_dax_ctl dax = { | ||
98 | .sector = _sector, | ||
99 | .size = _size, | ||
100 | }; | ||
101 | |||
102 | might_sleep(); | ||
103 | do { | ||
104 | long count, sz; | ||
105 | |||
106 | count = dax_map_atomic(bdev, &dax); | ||
107 | if (count < 0) | ||
108 | return count; | ||
109 | sz = min_t(long, count, SZ_128K); | ||
110 | clear_pmem(dax.addr, sz); | ||
111 | dax.size -= sz; | ||
112 | dax.sector += sz / 512; | ||
113 | dax_unmap_atomic(bdev, &dax); | ||
114 | cond_resched(); | ||
115 | } while (dax.size); | ||
116 | |||
117 | wmb_pmem(); | ||
118 | return 0; | ||
119 | } | ||
120 | EXPORT_SYMBOL_GPL(dax_clear_sectors); | ||
121 | |||
122 | /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */ | ||
123 | static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first, | ||
124 | loff_t pos, loff_t end) | ||
125 | { | ||
126 | loff_t final = end - pos + first; /* The final byte of the buffer */ | ||
127 | |||
128 | if (first > 0) | ||
129 | clear_pmem(addr, first); | ||
130 | if (final < size) | ||
131 | clear_pmem(addr + final, size - final); | ||
132 | } | ||
133 | |||
134 | static bool buffer_written(struct buffer_head *bh) | 90 | static bool buffer_written(struct buffer_head *bh) |
135 | { | 91 | { |
136 | return buffer_mapped(bh) && !buffer_unwritten(bh); | 92 | return buffer_mapped(bh) && !buffer_unwritten(bh); |
@@ -169,6 +125,9 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, | |||
169 | struct blk_dax_ctl dax = { | 125 | struct blk_dax_ctl dax = { |
170 | .addr = (void __pmem *) ERR_PTR(-EIO), | 126 | .addr = (void __pmem *) ERR_PTR(-EIO), |
171 | }; | 127 | }; |
128 | unsigned blkbits = inode->i_blkbits; | ||
129 | sector_t file_blks = (i_size_read(inode) + (1 << blkbits) - 1) | ||
130 | >> blkbits; | ||
172 | 131 | ||
173 | if (rw == READ) | 132 | if (rw == READ) |
174 | end = min(end, i_size_read(inode)); | 133 | end = min(end, i_size_read(inode)); |
@@ -176,7 +135,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, | |||
176 | while (pos < end) { | 135 | while (pos < end) { |
177 | size_t len; | 136 | size_t len; |
178 | if (pos == max) { | 137 | if (pos == max) { |
179 | unsigned blkbits = inode->i_blkbits; | ||
180 | long page = pos >> PAGE_SHIFT; | 138 | long page = pos >> PAGE_SHIFT; |
181 | sector_t block = page << (PAGE_SHIFT - blkbits); | 139 | sector_t block = page << (PAGE_SHIFT - blkbits); |
182 | unsigned first = pos - (block << blkbits); | 140 | unsigned first = pos - (block << blkbits); |
@@ -192,6 +150,13 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, | |||
192 | bh->b_size = 1 << blkbits; | 150 | bh->b_size = 1 << blkbits; |
193 | bh_max = pos - first + bh->b_size; | 151 | bh_max = pos - first + bh->b_size; |
194 | bdev = bh->b_bdev; | 152 | bdev = bh->b_bdev; |
153 | /* | ||
154 | * We allow uninitialized buffers for writes | ||
155 | * beyond EOF as those cannot race with faults | ||
156 | */ | ||
157 | WARN_ON_ONCE( | ||
158 | (buffer_new(bh) && block < file_blks) || | ||
159 | (rw == WRITE && buffer_unwritten(bh))); | ||
195 | } else { | 160 | } else { |
196 | unsigned done = bh->b_size - | 161 | unsigned done = bh->b_size - |
197 | (bh_max - (pos - first)); | 162 | (bh_max - (pos - first)); |
@@ -211,11 +176,6 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, | |||
211 | rc = map_len; | 176 | rc = map_len; |
212 | break; | 177 | break; |
213 | } | 178 | } |
214 | if (buffer_unwritten(bh) || buffer_new(bh)) { | ||
215 | dax_new_buf(dax.addr, map_len, first, | ||
216 | pos, end); | ||
217 | need_wmb = true; | ||
218 | } | ||
219 | dax.addr += first; | 179 | dax.addr += first; |
220 | size = map_len - first; | 180 | size = map_len - first; |
221 | } | 181 | } |
@@ -276,15 +236,8 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, | |||
276 | memset(&bh, 0, sizeof(bh)); | 236 | memset(&bh, 0, sizeof(bh)); |
277 | bh.b_bdev = inode->i_sb->s_bdev; | 237 | bh.b_bdev = inode->i_sb->s_bdev; |
278 | 238 | ||
279 | if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) { | 239 | if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) |
280 | struct address_space *mapping = inode->i_mapping; | ||
281 | inode_lock(inode); | 240 | inode_lock(inode); |
282 | retval = filemap_write_and_wait_range(mapping, pos, end - 1); | ||
283 | if (retval) { | ||
284 | inode_unlock(inode); | ||
285 | goto out; | ||
286 | } | ||
287 | } | ||
288 | 241 | ||
289 | /* Protects against truncate */ | 242 | /* Protects against truncate */ |
290 | if (!(flags & DIO_SKIP_DIO_COUNT)) | 243 | if (!(flags & DIO_SKIP_DIO_COUNT)) |
@@ -305,7 +258,6 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, | |||
305 | 258 | ||
306 | if (!(flags & DIO_SKIP_DIO_COUNT)) | 259 | if (!(flags & DIO_SKIP_DIO_COUNT)) |
307 | inode_dio_end(inode); | 260 | inode_dio_end(inode); |
308 | out: | ||
309 | return retval; | 261 | return retval; |
310 | } | 262 | } |
311 | EXPORT_SYMBOL_GPL(dax_do_io); | 263 | EXPORT_SYMBOL_GPL(dax_do_io); |
@@ -321,20 +273,11 @@ EXPORT_SYMBOL_GPL(dax_do_io); | |||
321 | static int dax_load_hole(struct address_space *mapping, struct page *page, | 273 | static int dax_load_hole(struct address_space *mapping, struct page *page, |
322 | struct vm_fault *vmf) | 274 | struct vm_fault *vmf) |
323 | { | 275 | { |
324 | unsigned long size; | ||
325 | struct inode *inode = mapping->host; | ||
326 | if (!page) | 276 | if (!page) |
327 | page = find_or_create_page(mapping, vmf->pgoff, | 277 | page = find_or_create_page(mapping, vmf->pgoff, |
328 | GFP_KERNEL | __GFP_ZERO); | 278 | GFP_KERNEL | __GFP_ZERO); |
329 | if (!page) | 279 | if (!page) |
330 | return VM_FAULT_OOM; | 280 | return VM_FAULT_OOM; |
331 | /* Recheck i_size under page lock to avoid truncate race */ | ||
332 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
333 | if (vmf->pgoff >= size) { | ||
334 | unlock_page(page); | ||
335 | put_page(page); | ||
336 | return VM_FAULT_SIGBUS; | ||
337 | } | ||
338 | 281 | ||
339 | vmf->page = page; | 282 | vmf->page = page; |
340 | return VM_FAULT_LOCKED; | 283 | return VM_FAULT_LOCKED; |
@@ -565,33 +508,14 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, | |||
565 | .sector = to_sector(bh, inode), | 508 | .sector = to_sector(bh, inode), |
566 | .size = bh->b_size, | 509 | .size = bh->b_size, |
567 | }; | 510 | }; |
568 | pgoff_t size; | ||
569 | int error; | 511 | int error; |
570 | 512 | ||
571 | i_mmap_lock_read(mapping); | 513 | i_mmap_lock_read(mapping); |
572 | 514 | ||
573 | /* | ||
574 | * Check truncate didn't happen while we were allocating a block. | ||
575 | * If it did, this block may or may not be still allocated to the | ||
576 | * file. We can't tell the filesystem to free it because we can't | ||
577 | * take i_mutex here. In the worst case, the file still has blocks | ||
578 | * allocated past the end of the file. | ||
579 | */ | ||
580 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
581 | if (unlikely(vmf->pgoff >= size)) { | ||
582 | error = -EIO; | ||
583 | goto out; | ||
584 | } | ||
585 | |||
586 | if (dax_map_atomic(bdev, &dax) < 0) { | 515 | if (dax_map_atomic(bdev, &dax) < 0) { |
587 | error = PTR_ERR(dax.addr); | 516 | error = PTR_ERR(dax.addr); |
588 | goto out; | 517 | goto out; |
589 | } | 518 | } |
590 | |||
591 | if (buffer_unwritten(bh) || buffer_new(bh)) { | ||
592 | clear_pmem(dax.addr, PAGE_SIZE); | ||
593 | wmb_pmem(); | ||
594 | } | ||
595 | dax_unmap_atomic(bdev, &dax); | 519 | dax_unmap_atomic(bdev, &dax); |
596 | 520 | ||
597 | error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false, | 521 | error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false, |
@@ -612,19 +536,13 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, | |||
612 | * @vma: The virtual memory area where the fault occurred | 536 | * @vma: The virtual memory area where the fault occurred |
613 | * @vmf: The description of the fault | 537 | * @vmf: The description of the fault |
614 | * @get_block: The filesystem method used to translate file offsets to blocks | 538 | * @get_block: The filesystem method used to translate file offsets to blocks |
615 | * @complete_unwritten: The filesystem method used to convert unwritten blocks | ||
616 | * to written so the data written to them is exposed. This is required for | ||
617 | * required by write faults for filesystems that will return unwritten | ||
618 | * extent mappings from @get_block, but it is optional for reads as | ||
619 | * dax_insert_mapping() will always zero unwritten blocks. If the fs does | ||
620 | * not support unwritten extents, the it should pass NULL. | ||
621 | * | 539 | * |
622 | * When a page fault occurs, filesystems may call this helper in their | 540 | * When a page fault occurs, filesystems may call this helper in their |
623 | * fault handler for DAX files. __dax_fault() assumes the caller has done all | 541 | * fault handler for DAX files. __dax_fault() assumes the caller has done all |
624 | * the necessary locking for the page fault to proceed successfully. | 542 | * the necessary locking for the page fault to proceed successfully. |
625 | */ | 543 | */ |
626 | int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | 544 | int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, |
627 | get_block_t get_block, dax_iodone_t complete_unwritten) | 545 | get_block_t get_block) |
628 | { | 546 | { |
629 | struct file *file = vma->vm_file; | 547 | struct file *file = vma->vm_file; |
630 | struct address_space *mapping = file->f_mapping; | 548 | struct address_space *mapping = file->f_mapping; |
@@ -659,15 +577,6 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
659 | put_page(page); | 577 | put_page(page); |
660 | goto repeat; | 578 | goto repeat; |
661 | } | 579 | } |
662 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
663 | if (unlikely(vmf->pgoff >= size)) { | ||
664 | /* | ||
665 | * We have a struct page covering a hole in the file | ||
666 | * from a read fault and we've raced with a truncate | ||
667 | */ | ||
668 | error = -EIO; | ||
669 | goto unlock_page; | ||
670 | } | ||
671 | } | 580 | } |
672 | 581 | ||
673 | error = get_block(inode, block, &bh, 0); | 582 | error = get_block(inode, block, &bh, 0); |
@@ -700,17 +609,8 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
700 | if (error) | 609 | if (error) |
701 | goto unlock_page; | 610 | goto unlock_page; |
702 | vmf->page = page; | 611 | vmf->page = page; |
703 | if (!page) { | 612 | if (!page) |
704 | i_mmap_lock_read(mapping); | 613 | i_mmap_lock_read(mapping); |
705 | /* Check we didn't race with truncate */ | ||
706 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> | ||
707 | PAGE_SHIFT; | ||
708 | if (vmf->pgoff >= size) { | ||
709 | i_mmap_unlock_read(mapping); | ||
710 | error = -EIO; | ||
711 | goto out; | ||
712 | } | ||
713 | } | ||
714 | return VM_FAULT_LOCKED; | 614 | return VM_FAULT_LOCKED; |
715 | } | 615 | } |
716 | 616 | ||
@@ -727,23 +627,9 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
727 | page = NULL; | 627 | page = NULL; |
728 | } | 628 | } |
729 | 629 | ||
730 | /* | 630 | /* Filesystem should not return unwritten buffers to us! */ |
731 | * If we successfully insert the new mapping over an unwritten extent, | 631 | WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh)); |
732 | * we need to ensure we convert the unwritten extent. If there is an | ||
733 | * error inserting the mapping, the filesystem needs to leave it as | ||
734 | * unwritten to prevent exposure of the stale underlying data to | ||
735 | * userspace, but we still need to call the completion function so | ||
736 | * the private resources on the mapping buffer can be released. We | ||
737 | * indicate what the callback should do via the uptodate variable, same | ||
738 | * as for normal BH based IO completions. | ||
739 | */ | ||
740 | error = dax_insert_mapping(inode, &bh, vma, vmf); | 632 | error = dax_insert_mapping(inode, &bh, vma, vmf); |
741 | if (buffer_unwritten(&bh)) { | ||
742 | if (complete_unwritten) | ||
743 | complete_unwritten(&bh, !error); | ||
744 | else | ||
745 | WARN_ON_ONCE(!(vmf->flags & FAULT_FLAG_WRITE)); | ||
746 | } | ||
747 | 633 | ||
748 | out: | 634 | out: |
749 | if (error == -ENOMEM) | 635 | if (error == -ENOMEM) |
@@ -772,7 +658,7 @@ EXPORT_SYMBOL(__dax_fault); | |||
772 | * fault handler for DAX files. | 658 | * fault handler for DAX files. |
773 | */ | 659 | */ |
774 | int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | 660 | int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, |
775 | get_block_t get_block, dax_iodone_t complete_unwritten) | 661 | get_block_t get_block) |
776 | { | 662 | { |
777 | int result; | 663 | int result; |
778 | struct super_block *sb = file_inode(vma->vm_file)->i_sb; | 664 | struct super_block *sb = file_inode(vma->vm_file)->i_sb; |
@@ -781,7 +667,7 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
781 | sb_start_pagefault(sb); | 667 | sb_start_pagefault(sb); |
782 | file_update_time(vma->vm_file); | 668 | file_update_time(vma->vm_file); |
783 | } | 669 | } |
784 | result = __dax_fault(vma, vmf, get_block, complete_unwritten); | 670 | result = __dax_fault(vma, vmf, get_block); |
785 | if (vmf->flags & FAULT_FLAG_WRITE) | 671 | if (vmf->flags & FAULT_FLAG_WRITE) |
786 | sb_end_pagefault(sb); | 672 | sb_end_pagefault(sb); |
787 | 673 | ||
@@ -815,8 +701,7 @@ static void __dax_dbg(struct buffer_head *bh, unsigned long address, | |||
815 | #define dax_pmd_dbg(bh, address, reason) __dax_dbg(bh, address, reason, "dax_pmd") | 701 | #define dax_pmd_dbg(bh, address, reason) __dax_dbg(bh, address, reason, "dax_pmd") |
816 | 702 | ||
817 | int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | 703 | int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, |
818 | pmd_t *pmd, unsigned int flags, get_block_t get_block, | 704 | pmd_t *pmd, unsigned int flags, get_block_t get_block) |
819 | dax_iodone_t complete_unwritten) | ||
820 | { | 705 | { |
821 | struct file *file = vma->vm_file; | 706 | struct file *file = vma->vm_file; |
822 | struct address_space *mapping = file->f_mapping; | 707 | struct address_space *mapping = file->f_mapping; |
@@ -875,6 +760,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
875 | if (get_block(inode, block, &bh, 1) != 0) | 760 | if (get_block(inode, block, &bh, 1) != 0) |
876 | return VM_FAULT_SIGBUS; | 761 | return VM_FAULT_SIGBUS; |
877 | alloc = true; | 762 | alloc = true; |
763 | WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh)); | ||
878 | } | 764 | } |
879 | 765 | ||
880 | bdev = bh.b_bdev; | 766 | bdev = bh.b_bdev; |
@@ -902,23 +788,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
902 | 788 | ||
903 | i_mmap_lock_read(mapping); | 789 | i_mmap_lock_read(mapping); |
904 | 790 | ||
905 | /* | ||
906 | * If a truncate happened while we were allocating blocks, we may | ||
907 | * leave blocks allocated to the file that are beyond EOF. We can't | ||
908 | * take i_mutex here, so just leave them hanging; they'll be freed | ||
909 | * when the file is deleted. | ||
910 | */ | ||
911 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
912 | if (pgoff >= size) { | ||
913 | result = VM_FAULT_SIGBUS; | ||
914 | goto out; | ||
915 | } | ||
916 | if ((pgoff | PG_PMD_COLOUR) >= size) { | ||
917 | dax_pmd_dbg(&bh, address, | ||
918 | "offset + huge page size > file size"); | ||
919 | goto fallback; | ||
920 | } | ||
921 | |||
922 | if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) { | 791 | if (!write && !buffer_mapped(&bh) && buffer_uptodate(&bh)) { |
923 | spinlock_t *ptl; | 792 | spinlock_t *ptl; |
924 | pmd_t entry; | 793 | pmd_t entry; |
@@ -954,8 +823,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
954 | long length = dax_map_atomic(bdev, &dax); | 823 | long length = dax_map_atomic(bdev, &dax); |
955 | 824 | ||
956 | if (length < 0) { | 825 | if (length < 0) { |
957 | result = VM_FAULT_SIGBUS; | 826 | dax_pmd_dbg(&bh, address, "dax-error fallback"); |
958 | goto out; | 827 | goto fallback; |
959 | } | 828 | } |
960 | if (length < PMD_SIZE) { | 829 | if (length < PMD_SIZE) { |
961 | dax_pmd_dbg(&bh, address, "dax-length too small"); | 830 | dax_pmd_dbg(&bh, address, "dax-length too small"); |
@@ -973,14 +842,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
973 | dax_pmd_dbg(&bh, address, "pfn not in memmap"); | 842 | dax_pmd_dbg(&bh, address, "pfn not in memmap"); |
974 | goto fallback; | 843 | goto fallback; |
975 | } | 844 | } |
976 | |||
977 | if (buffer_unwritten(&bh) || buffer_new(&bh)) { | ||
978 | clear_pmem(dax.addr, PMD_SIZE); | ||
979 | wmb_pmem(); | ||
980 | count_vm_event(PGMAJFAULT); | ||
981 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); | ||
982 | result |= VM_FAULT_MAJOR; | ||
983 | } | ||
984 | dax_unmap_atomic(bdev, &dax); | 845 | dax_unmap_atomic(bdev, &dax); |
985 | 846 | ||
986 | /* | 847 | /* |
@@ -1020,9 +881,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
1020 | out: | 881 | out: |
1021 | i_mmap_unlock_read(mapping); | 882 | i_mmap_unlock_read(mapping); |
1022 | 883 | ||
1023 | if (buffer_unwritten(&bh)) | ||
1024 | complete_unwritten(&bh, !(result & VM_FAULT_ERROR)); | ||
1025 | |||
1026 | return result; | 884 | return result; |
1027 | 885 | ||
1028 | fallback: | 886 | fallback: |
@@ -1042,8 +900,7 @@ EXPORT_SYMBOL_GPL(__dax_pmd_fault); | |||
1042 | * pmd_fault handler for DAX files. | 900 | * pmd_fault handler for DAX files. |
1043 | */ | 901 | */ |
1044 | int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | 902 | int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, |
1045 | pmd_t *pmd, unsigned int flags, get_block_t get_block, | 903 | pmd_t *pmd, unsigned int flags, get_block_t get_block) |
1046 | dax_iodone_t complete_unwritten) | ||
1047 | { | 904 | { |
1048 | int result; | 905 | int result; |
1049 | struct super_block *sb = file_inode(vma->vm_file)->i_sb; | 906 | struct super_block *sb = file_inode(vma->vm_file)->i_sb; |
@@ -1052,8 +909,7 @@ int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, | |||
1052 | sb_start_pagefault(sb); | 909 | sb_start_pagefault(sb); |
1053 | file_update_time(vma->vm_file); | 910 | file_update_time(vma->vm_file); |
1054 | } | 911 | } |
1055 | result = __dax_pmd_fault(vma, address, pmd, flags, get_block, | 912 | result = __dax_pmd_fault(vma, address, pmd, flags, get_block); |
1056 | complete_unwritten); | ||
1057 | if (flags & FAULT_FLAG_WRITE) | 913 | if (flags & FAULT_FLAG_WRITE) |
1058 | sb_end_pagefault(sb); | 914 | sb_end_pagefault(sb); |
1059 | 915 | ||
@@ -1091,6 +947,43 @@ int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1091 | } | 947 | } |
1092 | EXPORT_SYMBOL_GPL(dax_pfn_mkwrite); | 948 | EXPORT_SYMBOL_GPL(dax_pfn_mkwrite); |
1093 | 949 | ||
950 | static bool dax_range_is_aligned(struct block_device *bdev, | ||
951 | unsigned int offset, unsigned int length) | ||
952 | { | ||
953 | unsigned short sector_size = bdev_logical_block_size(bdev); | ||
954 | |||
955 | if (!IS_ALIGNED(offset, sector_size)) | ||
956 | return false; | ||
957 | if (!IS_ALIGNED(length, sector_size)) | ||
958 | return false; | ||
959 | |||
960 | return true; | ||
961 | } | ||
962 | |||
963 | int __dax_zero_page_range(struct block_device *bdev, sector_t sector, | ||
964 | unsigned int offset, unsigned int length) | ||
965 | { | ||
966 | struct blk_dax_ctl dax = { | ||
967 | .sector = sector, | ||
968 | .size = PAGE_SIZE, | ||
969 | }; | ||
970 | |||
971 | if (dax_range_is_aligned(bdev, offset, length)) { | ||
972 | sector_t start_sector = dax.sector + (offset >> 9); | ||
973 | |||
974 | return blkdev_issue_zeroout(bdev, start_sector, | ||
975 | length >> 9, GFP_NOFS, true); | ||
976 | } else { | ||
977 | if (dax_map_atomic(bdev, &dax) < 0) | ||
978 | return PTR_ERR(dax.addr); | ||
979 | clear_pmem(dax.addr + offset, length); | ||
980 | wmb_pmem(); | ||
981 | dax_unmap_atomic(bdev, &dax); | ||
982 | } | ||
983 | return 0; | ||
984 | } | ||
985 | EXPORT_SYMBOL_GPL(__dax_zero_page_range); | ||
986 | |||
1094 | /** | 987 | /** |
1095 | * dax_zero_page_range - zero a range within a page of a DAX file | 988 | * dax_zero_page_range - zero a range within a page of a DAX file |
1096 | * @inode: The file being truncated | 989 | * @inode: The file being truncated |
@@ -1102,12 +995,6 @@ EXPORT_SYMBOL_GPL(dax_pfn_mkwrite); | |||
1102 | * page in a DAX file. This is intended for hole-punch operations. If | 995 | * page in a DAX file. This is intended for hole-punch operations. If |
1103 | * you are truncating a file, the helper function dax_truncate_page() may be | 996 | * you are truncating a file, the helper function dax_truncate_page() may be |
1104 | * more convenient. | 997 | * more convenient. |
1105 | * | ||
1106 | * We work in terms of PAGE_SIZE here for commonality with | ||
1107 | * block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem | ||
1108 | * took care of disposing of the unnecessary blocks. Even if the filesystem | ||
1109 | * block size is smaller than PAGE_SIZE, we have to zero the rest of the page | ||
1110 | * since the file might be mmapped. | ||
1111 | */ | 998 | */ |
1112 | int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length, | 999 | int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length, |
1113 | get_block_t get_block) | 1000 | get_block_t get_block) |
@@ -1126,23 +1013,11 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length, | |||
1126 | bh.b_bdev = inode->i_sb->s_bdev; | 1013 | bh.b_bdev = inode->i_sb->s_bdev; |
1127 | bh.b_size = PAGE_SIZE; | 1014 | bh.b_size = PAGE_SIZE; |
1128 | err = get_block(inode, index, &bh, 0); | 1015 | err = get_block(inode, index, &bh, 0); |
1129 | if (err < 0) | 1016 | if (err < 0 || !buffer_written(&bh)) |
1130 | return err; | 1017 | return err; |
1131 | if (buffer_written(&bh)) { | ||
1132 | struct block_device *bdev = bh.b_bdev; | ||
1133 | struct blk_dax_ctl dax = { | ||
1134 | .sector = to_sector(&bh, inode), | ||
1135 | .size = PAGE_SIZE, | ||
1136 | }; | ||
1137 | |||
1138 | if (dax_map_atomic(bdev, &dax) < 0) | ||
1139 | return PTR_ERR(dax.addr); | ||
1140 | clear_pmem(dax.addr + offset, length); | ||
1141 | wmb_pmem(); | ||
1142 | dax_unmap_atomic(bdev, &dax); | ||
1143 | } | ||
1144 | 1018 | ||
1145 | return 0; | 1019 | return __dax_zero_page_range(bh.b_bdev, to_sector(&bh, inode), |
1020 | offset, length); | ||
1146 | } | 1021 | } |
1147 | EXPORT_SYMBOL_GPL(dax_zero_page_range); | 1022 | EXPORT_SYMBOL_GPL(dax_zero_page_range); |
1148 | 1023 | ||
@@ -1154,12 +1029,6 @@ EXPORT_SYMBOL_GPL(dax_zero_page_range); | |||
1154 | * | 1029 | * |
1155 | * Similar to block_truncate_page(), this function can be called by a | 1030 | * Similar to block_truncate_page(), this function can be called by a |
1156 | * filesystem when it is truncating a DAX file to handle the partial page. | 1031 | * filesystem when it is truncating a DAX file to handle the partial page. |
1157 | * | ||
1158 | * We work in terms of PAGE_SIZE here for commonality with | ||
1159 | * block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem | ||
1160 | * took care of disposing of the unnecessary blocks. Even if the filesystem | ||
1161 | * block size is smaller than PAGE_SIZE, we have to zero the rest of the page | ||
1162 | * since the file might be mmapped. | ||
1163 | */ | 1032 | */ |
1164 | int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block) | 1033 | int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block) |
1165 | { | 1034 | { |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index c1400b109805..868c02317b05 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -51,7 +51,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
51 | } | 51 | } |
52 | down_read(&ei->dax_sem); | 52 | down_read(&ei->dax_sem); |
53 | 53 | ||
54 | ret = __dax_fault(vma, vmf, ext2_get_block, NULL); | 54 | ret = __dax_fault(vma, vmf, ext2_get_block); |
55 | 55 | ||
56 | up_read(&ei->dax_sem); | 56 | up_read(&ei->dax_sem); |
57 | if (vmf->flags & FAULT_FLAG_WRITE) | 57 | if (vmf->flags & FAULT_FLAG_WRITE) |
@@ -72,7 +72,7 @@ static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | |||
72 | } | 72 | } |
73 | down_read(&ei->dax_sem); | 73 | down_read(&ei->dax_sem); |
74 | 74 | ||
75 | ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block, NULL); | 75 | ret = __dax_pmd_fault(vma, addr, pmd, flags, ext2_get_block); |
76 | 76 | ||
77 | up_read(&ei->dax_sem); | 77 | up_read(&ei->dax_sem); |
78 | if (flags & FAULT_FLAG_WRITE) | 78 | if (flags & FAULT_FLAG_WRITE) |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index b675610391b8..fcbe58641e40 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/highuid.h> | 26 | #include <linux/highuid.h> |
27 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
28 | #include <linux/dax.h> | 28 | #include <linux/dax.h> |
29 | #include <linux/blkdev.h> | ||
29 | #include <linux/quotaops.h> | 30 | #include <linux/quotaops.h> |
30 | #include <linux/writeback.h> | 31 | #include <linux/writeback.h> |
31 | #include <linux/buffer_head.h> | 32 | #include <linux/buffer_head.h> |
@@ -737,19 +738,18 @@ static int ext2_get_blocks(struct inode *inode, | |||
737 | * so that it's not found by another thread before it's | 738 | * so that it's not found by another thread before it's |
738 | * initialised | 739 | * initialised |
739 | */ | 740 | */ |
740 | err = dax_clear_sectors(inode->i_sb->s_bdev, | 741 | err = sb_issue_zeroout(inode->i_sb, |
741 | le32_to_cpu(chain[depth-1].key) << | 742 | le32_to_cpu(chain[depth-1].key), count, |
742 | (inode->i_blkbits - 9), | 743 | GFP_NOFS); |
743 | 1 << inode->i_blkbits); | ||
744 | if (err) { | 744 | if (err) { |
745 | mutex_unlock(&ei->truncate_mutex); | 745 | mutex_unlock(&ei->truncate_mutex); |
746 | goto cleanup; | 746 | goto cleanup; |
747 | } | 747 | } |
748 | } | 748 | } else |
749 | set_buffer_new(bh_result); | ||
749 | 750 | ||
750 | ext2_splice_branch(inode, iblock, partial, indirect_blks, count); | 751 | ext2_splice_branch(inode, iblock, partial, indirect_blks, count); |
751 | mutex_unlock(&ei->truncate_mutex); | 752 | mutex_unlock(&ei->truncate_mutex); |
752 | set_buffer_new(bh_result); | ||
753 | got_it: | 753 | got_it: |
754 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | 754 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); |
755 | if (count > blocks_to_boundary) | 755 | if (count > blocks_to_boundary) |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b78caf25f746..1d9379568aa8 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -922,16 +922,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
922 | blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); | 922 | blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); |
923 | 923 | ||
924 | if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { | 924 | if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { |
925 | if (blocksize != PAGE_SIZE) { | 925 | err = bdev_dax_supported(sb, blocksize); |
926 | ext2_msg(sb, KERN_ERR, | 926 | if (err) |
927 | "error: unsupported blocksize for dax"); | ||
928 | goto failed_mount; | 927 | goto failed_mount; |
929 | } | ||
930 | if (!sb->s_bdev->bd_disk->fops->direct_access) { | ||
931 | ext2_msg(sb, KERN_ERR, | ||
932 | "error: device does not support dax"); | ||
933 | goto failed_mount; | ||
934 | } | ||
935 | } | 928 | } |
936 | 929 | ||
937 | /* If the blocksize doesn't match, re-read the thing.. */ | 930 | /* If the blocksize doesn't match, re-read the thing.. */ |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d478110c32a6..df44c877892a 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -202,7 +202,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
202 | if (IS_ERR(handle)) | 202 | if (IS_ERR(handle)) |
203 | result = VM_FAULT_SIGBUS; | 203 | result = VM_FAULT_SIGBUS; |
204 | else | 204 | else |
205 | result = __dax_fault(vma, vmf, ext4_dax_get_block, NULL); | 205 | result = __dax_fault(vma, vmf, ext4_dax_get_block); |
206 | 206 | ||
207 | if (write) { | 207 | if (write) { |
208 | if (!IS_ERR(handle)) | 208 | if (!IS_ERR(handle)) |
@@ -238,7 +238,7 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | |||
238 | result = VM_FAULT_SIGBUS; | 238 | result = VM_FAULT_SIGBUS; |
239 | else | 239 | else |
240 | result = __dax_pmd_fault(vma, addr, pmd, flags, | 240 | result = __dax_pmd_fault(vma, addr, pmd, flags, |
241 | ext4_dax_get_block, NULL); | 241 | ext4_dax_get_block); |
242 | 242 | ||
243 | if (write) { | 243 | if (write) { |
244 | if (!IS_ERR(handle)) | 244 | if (!IS_ERR(handle)) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 20c5d52253b4..3822a5aedc61 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -3417,16 +3417,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3417 | } | 3417 | } |
3418 | 3418 | ||
3419 | if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { | 3419 | if (sbi->s_mount_opt & EXT4_MOUNT_DAX) { |
3420 | if (blocksize != PAGE_SIZE) { | 3420 | err = bdev_dax_supported(sb, blocksize); |
3421 | ext4_msg(sb, KERN_ERR, | 3421 | if (err) |
3422 | "error: unsupported blocksize for dax"); | ||
3423 | goto failed_mount; | ||
3424 | } | ||
3425 | if (!sb->s_bdev->bd_disk->fops->direct_access) { | ||
3426 | ext4_msg(sb, KERN_ERR, | ||
3427 | "error: device does not support dax"); | ||
3428 | goto failed_mount; | 3422 | goto failed_mount; |
3429 | } | ||
3430 | } | 3423 | } |
3431 | 3424 | ||
3432 | if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) { | 3425 | if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) { |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 613ea2d7ac19..586bb64e674b 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -72,18 +72,11 @@ xfs_zero_extent( | |||
72 | struct xfs_mount *mp = ip->i_mount; | 72 | struct xfs_mount *mp = ip->i_mount; |
73 | xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); | 73 | xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb); |
74 | sector_t block = XFS_BB_TO_FSBT(mp, sector); | 74 | sector_t block = XFS_BB_TO_FSBT(mp, sector); |
75 | ssize_t size = XFS_FSB_TO_B(mp, count_fsb); | ||
76 | |||
77 | if (IS_DAX(VFS_I(ip))) | ||
78 | return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)), | ||
79 | sector, size); | ||
80 | |||
81 | /* | ||
82 | * let the block layer decide on the fastest method of | ||
83 | * implementing the zeroing. | ||
84 | */ | ||
85 | return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS); | ||
86 | 75 | ||
76 | return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)), | ||
77 | block << (mp->m_super->s_blocksize_bits - 9), | ||
78 | count_fsb << (mp->m_super->s_blocksize_bits - 9), | ||
79 | GFP_NOFS, true); | ||
87 | } | 80 | } |
88 | 81 | ||
89 | /* | 82 | /* |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 44af22897c8b..47fc63295422 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -1551,7 +1551,7 @@ xfs_filemap_page_mkwrite( | |||
1551 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); | 1551 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); |
1552 | 1552 | ||
1553 | if (IS_DAX(inode)) { | 1553 | if (IS_DAX(inode)) { |
1554 | ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault, NULL); | 1554 | ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); |
1555 | } else { | 1555 | } else { |
1556 | ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); | 1556 | ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); |
1557 | ret = block_page_mkwrite_return(ret); | 1557 | ret = block_page_mkwrite_return(ret); |
@@ -1585,7 +1585,7 @@ xfs_filemap_fault( | |||
1585 | * changes to xfs_get_blocks_direct() to map unwritten extent | 1585 | * changes to xfs_get_blocks_direct() to map unwritten extent |
1586 | * ioend for conversion on read-only mappings. | 1586 | * ioend for conversion on read-only mappings. |
1587 | */ | 1587 | */ |
1588 | ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL); | 1588 | ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault); |
1589 | } else | 1589 | } else |
1590 | ret = filemap_fault(vma, vmf); | 1590 | ret = filemap_fault(vma, vmf); |
1591 | xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); | 1591 | xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); |
@@ -1622,8 +1622,7 @@ xfs_filemap_pmd_fault( | |||
1622 | } | 1622 | } |
1623 | 1623 | ||
1624 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); | 1624 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); |
1625 | ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault, | 1625 | ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault); |
1626 | NULL); | ||
1627 | xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); | 1626 | xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); |
1628 | 1627 | ||
1629 | if (flags & FAULT_FLAG_WRITE) | 1628 | if (flags & FAULT_FLAG_WRITE) |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 416421d7ff10..11ea5d51db56 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -1555,14 +1555,12 @@ xfs_fs_fill_super( | |||
1555 | 1555 | ||
1556 | if (mp->m_flags & XFS_MOUNT_DAX) { | 1556 | if (mp->m_flags & XFS_MOUNT_DAX) { |
1557 | xfs_warn(mp, | 1557 | xfs_warn(mp, |
1558 | "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); | 1558 | "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); |
1559 | if (sb->s_blocksize != PAGE_SIZE) { | 1559 | |
1560 | xfs_alert(mp, | 1560 | error = bdev_dax_supported(sb, sb->s_blocksize); |
1561 | "Filesystem block size invalid for DAX Turning DAX off."); | 1561 | if (error) { |
1562 | mp->m_flags &= ~XFS_MOUNT_DAX; | ||
1563 | } else if (!sb->s_bdev->bd_disk->fops->direct_access) { | ||
1564 | xfs_alert(mp, | 1562 | xfs_alert(mp, |
1565 | "Block device does not support DAX Turning DAX off."); | 1563 | "DAX unsupported by block device. Turning off DAX."); |
1566 | mp->m_flags &= ~XFS_MOUNT_DAX; | 1564 | mp->m_flags &= ~XFS_MOUNT_DAX; |
1567 | } | 1565 | } |
1568 | } | 1566 | } |