diff options
author | Dave Chinner <david@fromorbit.com> | 2016-10-02 18:53:59 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2016-10-02 18:53:59 -0400 |
commit | a1f45e668e14c26b4700b1936c5a41b58cc4ac74 (patch) | |
tree | 94c1d2b34c15fff8ff39baf7357673978b5a3b2f | |
parent | a89b3f97bb7c248aea155a90f31d3dfb93b75971 (diff) | |
parent | d5bfccdf38d094f2b15fae8b361d7bd47f2509d6 (diff) |
Merge branch 'iomap-4.9-dax' into for-next
-rw-r--r-- | fs/dax.c | 252 | ||||
-rw-r--r-- | fs/ext2/Kconfig | 1 | ||||
-rw-r--r-- | fs/ext2/ext2.h | 1 | ||||
-rw-r--r-- | fs/ext2/file.c | 76 | ||||
-rw-r--r-- | fs/ext2/inode.c | 100 | ||||
-rw-r--r-- | fs/internal.h | 11 | ||||
-rw-r--r-- | fs/iomap.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 79 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 22 | ||||
-rw-r--r-- | include/linux/dax.h | 6 | ||||
-rw-r--r-- | include/linux/iomap.h | 1 |
13 files changed, 464 insertions, 122 deletions
@@ -31,6 +31,8 @@ | |||
31 | #include <linux/vmstat.h> | 31 | #include <linux/vmstat.h> |
32 | #include <linux/pfn_t.h> | 32 | #include <linux/pfn_t.h> |
33 | #include <linux/sizes.h> | 33 | #include <linux/sizes.h> |
34 | #include <linux/iomap.h> | ||
35 | #include "internal.h" | ||
34 | 36 | ||
35 | /* | 37 | /* |
36 | * We use lowest available bit in exceptional entry for locking, other two | 38 | * We use lowest available bit in exceptional entry for locking, other two |
@@ -580,14 +582,13 @@ static int dax_load_hole(struct address_space *mapping, void *entry, | |||
580 | return VM_FAULT_LOCKED; | 582 | return VM_FAULT_LOCKED; |
581 | } | 583 | } |
582 | 584 | ||
583 | static int copy_user_bh(struct page *to, struct inode *inode, | 585 | static int copy_user_dax(struct block_device *bdev, sector_t sector, size_t size, |
584 | struct buffer_head *bh, unsigned long vaddr) | 586 | struct page *to, unsigned long vaddr) |
585 | { | 587 | { |
586 | struct blk_dax_ctl dax = { | 588 | struct blk_dax_ctl dax = { |
587 | .sector = to_sector(bh, inode), | 589 | .sector = sector, |
588 | .size = bh->b_size, | 590 | .size = size, |
589 | }; | 591 | }; |
590 | struct block_device *bdev = bh->b_bdev; | ||
591 | void *vto; | 592 | void *vto; |
592 | 593 | ||
593 | if (dax_map_atomic(bdev, &dax) < 0) | 594 | if (dax_map_atomic(bdev, &dax) < 0) |
@@ -790,14 +791,13 @@ int dax_writeback_mapping_range(struct address_space *mapping, | |||
790 | EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); | 791 | EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); |
791 | 792 | ||
792 | static int dax_insert_mapping(struct address_space *mapping, | 793 | static int dax_insert_mapping(struct address_space *mapping, |
793 | struct buffer_head *bh, void **entryp, | 794 | struct block_device *bdev, sector_t sector, size_t size, |
794 | struct vm_area_struct *vma, struct vm_fault *vmf) | 795 | void **entryp, struct vm_area_struct *vma, struct vm_fault *vmf) |
795 | { | 796 | { |
796 | unsigned long vaddr = (unsigned long)vmf->virtual_address; | 797 | unsigned long vaddr = (unsigned long)vmf->virtual_address; |
797 | struct block_device *bdev = bh->b_bdev; | ||
798 | struct blk_dax_ctl dax = { | 798 | struct blk_dax_ctl dax = { |
799 | .sector = to_sector(bh, mapping->host), | 799 | .sector = sector, |
800 | .size = bh->b_size, | 800 | .size = size, |
801 | }; | 801 | }; |
802 | void *ret; | 802 | void *ret; |
803 | void *entry = *entryp; | 803 | void *entry = *entryp; |
@@ -868,7 +868,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
868 | if (vmf->cow_page) { | 868 | if (vmf->cow_page) { |
869 | struct page *new_page = vmf->cow_page; | 869 | struct page *new_page = vmf->cow_page; |
870 | if (buffer_written(&bh)) | 870 | if (buffer_written(&bh)) |
871 | error = copy_user_bh(new_page, inode, &bh, vaddr); | 871 | error = copy_user_dax(bh.b_bdev, to_sector(&bh, inode), |
872 | bh.b_size, new_page, vaddr); | ||
872 | else | 873 | else |
873 | clear_user_highpage(new_page, vaddr); | 874 | clear_user_highpage(new_page, vaddr); |
874 | if (error) | 875 | if (error) |
@@ -898,7 +899,8 @@ int dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
898 | 899 | ||
899 | /* Filesystem should not return unwritten buffers to us! */ | 900 | /* Filesystem should not return unwritten buffers to us! */ |
900 | WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh)); | 901 | WARN_ON_ONCE(buffer_unwritten(&bh) || buffer_new(&bh)); |
901 | error = dax_insert_mapping(mapping, &bh, &entry, vma, vmf); | 902 | error = dax_insert_mapping(mapping, bh.b_bdev, to_sector(&bh, inode), |
903 | bh.b_size, &entry, vma, vmf); | ||
902 | unlock_entry: | 904 | unlock_entry: |
903 | put_locked_mapping_entry(mapping, vmf->pgoff, entry); | 905 | put_locked_mapping_entry(mapping, vmf->pgoff, entry); |
904 | out: | 906 | out: |
@@ -1241,3 +1243,229 @@ int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block) | |||
1241 | return dax_zero_page_range(inode, from, length, get_block); | 1243 | return dax_zero_page_range(inode, from, length, get_block); |
1242 | } | 1244 | } |
1243 | EXPORT_SYMBOL_GPL(dax_truncate_page); | 1245 | EXPORT_SYMBOL_GPL(dax_truncate_page); |
1246 | |||
1247 | #ifdef CONFIG_FS_IOMAP | ||
1248 | static loff_t | ||
1249 | iomap_dax_actor(struct inode *inode, loff_t pos, loff_t length, void *data, | ||
1250 | struct iomap *iomap) | ||
1251 | { | ||
1252 | struct iov_iter *iter = data; | ||
1253 | loff_t end = pos + length, done = 0; | ||
1254 | ssize_t ret = 0; | ||
1255 | |||
1256 | if (iov_iter_rw(iter) == READ) { | ||
1257 | end = min(end, i_size_read(inode)); | ||
1258 | if (pos >= end) | ||
1259 | return 0; | ||
1260 | |||
1261 | if (iomap->type == IOMAP_HOLE || iomap->type == IOMAP_UNWRITTEN) | ||
1262 | return iov_iter_zero(min(length, end - pos), iter); | ||
1263 | } | ||
1264 | |||
1265 | if (WARN_ON_ONCE(iomap->type != IOMAP_MAPPED)) | ||
1266 | return -EIO; | ||
1267 | |||
1268 | while (pos < end) { | ||
1269 | unsigned offset = pos & (PAGE_SIZE - 1); | ||
1270 | struct blk_dax_ctl dax = { 0 }; | ||
1271 | ssize_t map_len; | ||
1272 | |||
1273 | dax.sector = iomap->blkno + | ||
1274 | (((pos & PAGE_MASK) - iomap->offset) >> 9); | ||
1275 | dax.size = (length + offset + PAGE_SIZE - 1) & PAGE_MASK; | ||
1276 | map_len = dax_map_atomic(iomap->bdev, &dax); | ||
1277 | if (map_len < 0) { | ||
1278 | ret = map_len; | ||
1279 | break; | ||
1280 | } | ||
1281 | |||
1282 | dax.addr += offset; | ||
1283 | map_len -= offset; | ||
1284 | if (map_len > end - pos) | ||
1285 | map_len = end - pos; | ||
1286 | |||
1287 | if (iov_iter_rw(iter) == WRITE) | ||
1288 | map_len = copy_from_iter_pmem(dax.addr, map_len, iter); | ||
1289 | else | ||
1290 | map_len = copy_to_iter(dax.addr, map_len, iter); | ||
1291 | dax_unmap_atomic(iomap->bdev, &dax); | ||
1292 | if (map_len <= 0) { | ||
1293 | ret = map_len ? map_len : -EFAULT; | ||
1294 | break; | ||
1295 | } | ||
1296 | |||
1297 | pos += map_len; | ||
1298 | length -= map_len; | ||
1299 | done += map_len; | ||
1300 | } | ||
1301 | |||
1302 | return done ? done : ret; | ||
1303 | } | ||
1304 | |||
1305 | /** | ||
1306 | * iomap_dax_rw - Perform I/O to a DAX file | ||
1307 | * @iocb: The control block for this I/O | ||
1308 | * @iter: The addresses to do I/O from or to | ||
1309 | * @ops: iomap ops passed from the file system | ||
1310 | * | ||
1311 | * This function performs read and write operations to directly mapped | ||
1312 | * persistent memory. The callers needs to take care of read/write exclusion | ||
1313 | * and evicting any page cache pages in the region under I/O. | ||
1314 | */ | ||
1315 | ssize_t | ||
1316 | iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, | ||
1317 | struct iomap_ops *ops) | ||
1318 | { | ||
1319 | struct address_space *mapping = iocb->ki_filp->f_mapping; | ||
1320 | struct inode *inode = mapping->host; | ||
1321 | loff_t pos = iocb->ki_pos, ret = 0, done = 0; | ||
1322 | unsigned flags = 0; | ||
1323 | |||
1324 | if (iov_iter_rw(iter) == WRITE) | ||
1325 | flags |= IOMAP_WRITE; | ||
1326 | |||
1327 | /* | ||
1328 | * Yes, even DAX files can have page cache attached to them: A zeroed | ||
1329 | * page is inserted into the pagecache when we have to serve a write | ||
1330 | * fault on a hole. It should never be dirtied and can simply be | ||
1331 | * dropped from the pagecache once we get real data for the page. | ||
1332 | * | ||
1333 | * XXX: This is racy against mmap, and there's nothing we can do about | ||
1334 | * it. We'll eventually need to shift this down even further so that | ||
1335 | * we can check if we allocated blocks over a hole first. | ||
1336 | */ | ||
1337 | if (mapping->nrpages) { | ||
1338 | ret = invalidate_inode_pages2_range(mapping, | ||
1339 | pos >> PAGE_SHIFT, | ||
1340 | (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT); | ||
1341 | WARN_ON_ONCE(ret); | ||
1342 | } | ||
1343 | |||
1344 | while (iov_iter_count(iter)) { | ||
1345 | ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, | ||
1346 | iter, iomap_dax_actor); | ||
1347 | if (ret <= 0) | ||
1348 | break; | ||
1349 | pos += ret; | ||
1350 | done += ret; | ||
1351 | } | ||
1352 | |||
1353 | iocb->ki_pos += done; | ||
1354 | return done ? done : ret; | ||
1355 | } | ||
1356 | EXPORT_SYMBOL_GPL(iomap_dax_rw); | ||
1357 | |||
1358 | /** | ||
1359 | * iomap_dax_fault - handle a page fault on a DAX file | ||
1360 | * @vma: The virtual memory area where the fault occurred | ||
1361 | * @vmf: The description of the fault | ||
1362 | * @ops: iomap ops passed from the file system | ||
1363 | * | ||
1364 | * When a page fault occurs, filesystems may call this helper in their fault | ||
1365 | * or mkwrite handler for DAX files. Assumes the caller has done all the | ||
1366 | * necessary locking for the page fault to proceed successfully. | ||
1367 | */ | ||
1368 | int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | ||
1369 | struct iomap_ops *ops) | ||
1370 | { | ||
1371 | struct address_space *mapping = vma->vm_file->f_mapping; | ||
1372 | struct inode *inode = mapping->host; | ||
1373 | unsigned long vaddr = (unsigned long)vmf->virtual_address; | ||
1374 | loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT; | ||
1375 | sector_t sector; | ||
1376 | struct iomap iomap = { 0 }; | ||
1377 | unsigned flags = 0; | ||
1378 | int error, major = 0; | ||
1379 | void *entry; | ||
1380 | |||
1381 | /* | ||
1382 | * Check whether offset isn't beyond end of file now. Caller is supposed | ||
1383 | * to hold locks serializing us with truncate / punch hole so this is | ||
1384 | * a reliable test. | ||
1385 | */ | ||
1386 | if (pos >= i_size_read(inode)) | ||
1387 | return VM_FAULT_SIGBUS; | ||
1388 | |||
1389 | entry = grab_mapping_entry(mapping, vmf->pgoff); | ||
1390 | if (IS_ERR(entry)) { | ||
1391 | error = PTR_ERR(entry); | ||
1392 | goto out; | ||
1393 | } | ||
1394 | |||
1395 | if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) | ||
1396 | flags |= IOMAP_WRITE; | ||
1397 | |||
1398 | /* | ||
1399 | * Note that we don't bother to use iomap_apply here: DAX required | ||
1400 | * the file system block size to be equal the page size, which means | ||
1401 | * that we never have to deal with more than a single extent here. | ||
1402 | */ | ||
1403 | error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); | ||
1404 | if (error) | ||
1405 | goto unlock_entry; | ||
1406 | if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { | ||
1407 | error = -EIO; /* fs corruption? */ | ||
1408 | goto unlock_entry; | ||
1409 | } | ||
1410 | |||
1411 | sector = iomap.blkno + (((pos & PAGE_MASK) - iomap.offset) >> 9); | ||
1412 | |||
1413 | if (vmf->cow_page) { | ||
1414 | switch (iomap.type) { | ||
1415 | case IOMAP_HOLE: | ||
1416 | case IOMAP_UNWRITTEN: | ||
1417 | clear_user_highpage(vmf->cow_page, vaddr); | ||
1418 | break; | ||
1419 | case IOMAP_MAPPED: | ||
1420 | error = copy_user_dax(iomap.bdev, sector, PAGE_SIZE, | ||
1421 | vmf->cow_page, vaddr); | ||
1422 | break; | ||
1423 | default: | ||
1424 | WARN_ON_ONCE(1); | ||
1425 | error = -EIO; | ||
1426 | break; | ||
1427 | } | ||
1428 | |||
1429 | if (error) | ||
1430 | goto unlock_entry; | ||
1431 | if (!radix_tree_exceptional_entry(entry)) { | ||
1432 | vmf->page = entry; | ||
1433 | return VM_FAULT_LOCKED; | ||
1434 | } | ||
1435 | vmf->entry = entry; | ||
1436 | return VM_FAULT_DAX_LOCKED; | ||
1437 | } | ||
1438 | |||
1439 | switch (iomap.type) { | ||
1440 | case IOMAP_MAPPED: | ||
1441 | if (iomap.flags & IOMAP_F_NEW) { | ||
1442 | count_vm_event(PGMAJFAULT); | ||
1443 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); | ||
1444 | major = VM_FAULT_MAJOR; | ||
1445 | } | ||
1446 | error = dax_insert_mapping(mapping, iomap.bdev, sector, | ||
1447 | PAGE_SIZE, &entry, vma, vmf); | ||
1448 | break; | ||
1449 | case IOMAP_UNWRITTEN: | ||
1450 | case IOMAP_HOLE: | ||
1451 | if (!(vmf->flags & FAULT_FLAG_WRITE)) | ||
1452 | return dax_load_hole(mapping, entry, vmf); | ||
1453 | /*FALLTHRU*/ | ||
1454 | default: | ||
1455 | WARN_ON_ONCE(1); | ||
1456 | error = -EIO; | ||
1457 | break; | ||
1458 | } | ||
1459 | |||
1460 | unlock_entry: | ||
1461 | put_locked_mapping_entry(mapping, vmf->pgoff, entry); | ||
1462 | out: | ||
1463 | if (error == -ENOMEM) | ||
1464 | return VM_FAULT_OOM | major; | ||
1465 | /* -EBUSY is fine, somebody else faulted on the same PTE */ | ||
1466 | if (error < 0 && error != -EBUSY) | ||
1467 | return VM_FAULT_SIGBUS | major; | ||
1468 | return VM_FAULT_NOPAGE | major; | ||
1469 | } | ||
1470 | EXPORT_SYMBOL_GPL(iomap_dax_fault); | ||
1471 | #endif /* CONFIG_FS_IOMAP */ | ||
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig index c634874e12d9..36bea5adcaba 100644 --- a/fs/ext2/Kconfig +++ b/fs/ext2/Kconfig | |||
@@ -1,5 +1,6 @@ | |||
1 | config EXT2_FS | 1 | config EXT2_FS |
2 | tristate "Second extended fs support" | 2 | tristate "Second extended fs support" |
3 | select FS_IOMAP if FS_DAX | ||
3 | help | 4 | help |
4 | Ext2 is a standard Linux file system for hard disks. | 5 | Ext2 is a standard Linux file system for hard disks. |
5 | 6 | ||
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 06af2f92226c..37e2be784ac7 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -814,6 +814,7 @@ extern const struct file_operations ext2_file_operations; | |||
814 | /* inode.c */ | 814 | /* inode.c */ |
815 | extern const struct address_space_operations ext2_aops; | 815 | extern const struct address_space_operations ext2_aops; |
816 | extern const struct address_space_operations ext2_nobh_aops; | 816 | extern const struct address_space_operations ext2_nobh_aops; |
817 | extern struct iomap_ops ext2_iomap_ops; | ||
817 | 818 | ||
818 | /* namei.c */ | 819 | /* namei.c */ |
819 | extern const struct inode_operations ext2_dir_inode_operations; | 820 | extern const struct inode_operations ext2_dir_inode_operations; |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5efeefe17abb..423cc01c9d41 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
@@ -22,11 +22,59 @@ | |||
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/dax.h> | 23 | #include <linux/dax.h> |
24 | #include <linux/quotaops.h> | 24 | #include <linux/quotaops.h> |
25 | #include <linux/iomap.h> | ||
26 | #include <linux/uio.h> | ||
25 | #include "ext2.h" | 27 | #include "ext2.h" |
26 | #include "xattr.h" | 28 | #include "xattr.h" |
27 | #include "acl.h" | 29 | #include "acl.h" |
28 | 30 | ||
29 | #ifdef CONFIG_FS_DAX | 31 | #ifdef CONFIG_FS_DAX |
32 | static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) | ||
33 | { | ||
34 | struct inode *inode = iocb->ki_filp->f_mapping->host; | ||
35 | ssize_t ret; | ||
36 | |||
37 | if (!iov_iter_count(to)) | ||
38 | return 0; /* skip atime */ | ||
39 | |||
40 | inode_lock_shared(inode); | ||
41 | ret = iomap_dax_rw(iocb, to, &ext2_iomap_ops); | ||
42 | inode_unlock_shared(inode); | ||
43 | |||
44 | file_accessed(iocb->ki_filp); | ||
45 | return ret; | ||
46 | } | ||
47 | |||
48 | static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) | ||
49 | { | ||
50 | struct file *file = iocb->ki_filp; | ||
51 | struct inode *inode = file->f_mapping->host; | ||
52 | ssize_t ret; | ||
53 | |||
54 | inode_lock(inode); | ||
55 | ret = generic_write_checks(iocb, from); | ||
56 | if (ret <= 0) | ||
57 | goto out_unlock; | ||
58 | ret = file_remove_privs(file); | ||
59 | if (ret) | ||
60 | goto out_unlock; | ||
61 | ret = file_update_time(file); | ||
62 | if (ret) | ||
63 | goto out_unlock; | ||
64 | |||
65 | ret = iomap_dax_rw(iocb, from, &ext2_iomap_ops); | ||
66 | if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { | ||
67 | i_size_write(inode, iocb->ki_pos); | ||
68 | mark_inode_dirty(inode); | ||
69 | } | ||
70 | |||
71 | out_unlock: | ||
72 | inode_unlock(inode); | ||
73 | if (ret > 0) | ||
74 | ret = generic_write_sync(iocb, ret); | ||
75 | return ret; | ||
76 | } | ||
77 | |||
30 | /* | 78 | /* |
31 | * The lock ordering for ext2 DAX fault paths is: | 79 | * The lock ordering for ext2 DAX fault paths is: |
32 | * | 80 | * |
@@ -51,7 +99,7 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
51 | } | 99 | } |
52 | down_read(&ei->dax_sem); | 100 | down_read(&ei->dax_sem); |
53 | 101 | ||
54 | ret = dax_fault(vma, vmf, ext2_get_block); | 102 | ret = iomap_dax_fault(vma, vmf, &ext2_iomap_ops); |
55 | 103 | ||
56 | up_read(&ei->dax_sem); | 104 | up_read(&ei->dax_sem); |
57 | if (vmf->flags & FAULT_FLAG_WRITE) | 105 | if (vmf->flags & FAULT_FLAG_WRITE) |
@@ -156,14 +204,28 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
156 | return ret; | 204 | return ret; |
157 | } | 205 | } |
158 | 206 | ||
159 | /* | 207 | static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) |
160 | * We have mostly NULL's here: the current defaults are ok for | 208 | { |
161 | * the ext2 filesystem. | 209 | #ifdef CONFIG_FS_DAX |
162 | */ | 210 | if (IS_DAX(iocb->ki_filp->f_mapping->host)) |
211 | return ext2_dax_read_iter(iocb, to); | ||
212 | #endif | ||
213 | return generic_file_read_iter(iocb, to); | ||
214 | } | ||
215 | |||
216 | static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | ||
217 | { | ||
218 | #ifdef CONFIG_FS_DAX | ||
219 | if (IS_DAX(iocb->ki_filp->f_mapping->host)) | ||
220 | return ext2_dax_write_iter(iocb, from); | ||
221 | #endif | ||
222 | return generic_file_write_iter(iocb, from); | ||
223 | } | ||
224 | |||
163 | const struct file_operations ext2_file_operations = { | 225 | const struct file_operations ext2_file_operations = { |
164 | .llseek = generic_file_llseek, | 226 | .llseek = generic_file_llseek, |
165 | .read_iter = generic_file_read_iter, | 227 | .read_iter = ext2_file_read_iter, |
166 | .write_iter = generic_file_write_iter, | 228 | .write_iter = ext2_file_write_iter, |
167 | .unlocked_ioctl = ext2_ioctl, | 229 | .unlocked_ioctl = ext2_ioctl, |
168 | #ifdef CONFIG_COMPAT | 230 | #ifdef CONFIG_COMPAT |
169 | .compat_ioctl = ext2_compat_ioctl, | 231 | .compat_ioctl = ext2_compat_ioctl, |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index d5c7d09919f3..c7dbb4661119 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/buffer_head.h> | 32 | #include <linux/buffer_head.h> |
33 | #include <linux/mpage.h> | 33 | #include <linux/mpage.h> |
34 | #include <linux/fiemap.h> | 34 | #include <linux/fiemap.h> |
35 | #include <linux/iomap.h> | ||
35 | #include <linux/namei.h> | 36 | #include <linux/namei.h> |
36 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
37 | #include "ext2.h" | 38 | #include "ext2.h" |
@@ -618,7 +619,7 @@ static void ext2_splice_branch(struct inode *inode, | |||
618 | */ | 619 | */ |
619 | static int ext2_get_blocks(struct inode *inode, | 620 | static int ext2_get_blocks(struct inode *inode, |
620 | sector_t iblock, unsigned long maxblocks, | 621 | sector_t iblock, unsigned long maxblocks, |
621 | struct buffer_head *bh_result, | 622 | u32 *bno, bool *new, bool *boundary, |
622 | int create) | 623 | int create) |
623 | { | 624 | { |
624 | int err = -EIO; | 625 | int err = -EIO; |
@@ -644,7 +645,6 @@ static int ext2_get_blocks(struct inode *inode, | |||
644 | /* Simplest case - block found, no allocation needed */ | 645 | /* Simplest case - block found, no allocation needed */ |
645 | if (!partial) { | 646 | if (!partial) { |
646 | first_block = le32_to_cpu(chain[depth - 1].key); | 647 | first_block = le32_to_cpu(chain[depth - 1].key); |
647 | clear_buffer_new(bh_result); /* What's this do? */ | ||
648 | count++; | 648 | count++; |
649 | /*map more blocks*/ | 649 | /*map more blocks*/ |
650 | while (count < maxblocks && count <= blocks_to_boundary) { | 650 | while (count < maxblocks && count <= blocks_to_boundary) { |
@@ -699,7 +699,6 @@ static int ext2_get_blocks(struct inode *inode, | |||
699 | mutex_unlock(&ei->truncate_mutex); | 699 | mutex_unlock(&ei->truncate_mutex); |
700 | if (err) | 700 | if (err) |
701 | goto cleanup; | 701 | goto cleanup; |
702 | clear_buffer_new(bh_result); | ||
703 | goto got_it; | 702 | goto got_it; |
704 | } | 703 | } |
705 | } | 704 | } |
@@ -745,15 +744,16 @@ static int ext2_get_blocks(struct inode *inode, | |||
745 | mutex_unlock(&ei->truncate_mutex); | 744 | mutex_unlock(&ei->truncate_mutex); |
746 | goto cleanup; | 745 | goto cleanup; |
747 | } | 746 | } |
748 | } else | 747 | } else { |
749 | set_buffer_new(bh_result); | 748 | *new = true; |
749 | } | ||
750 | 750 | ||
751 | ext2_splice_branch(inode, iblock, partial, indirect_blks, count); | 751 | ext2_splice_branch(inode, iblock, partial, indirect_blks, count); |
752 | mutex_unlock(&ei->truncate_mutex); | 752 | mutex_unlock(&ei->truncate_mutex); |
753 | got_it: | 753 | got_it: |
754 | map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); | 754 | *bno = le32_to_cpu(chain[depth-1].key); |
755 | if (count > blocks_to_boundary) | 755 | if (count > blocks_to_boundary) |
756 | set_buffer_boundary(bh_result); | 756 | *boundary = true; |
757 | err = count; | 757 | err = count; |
758 | /* Clean up and exit */ | 758 | /* Clean up and exit */ |
759 | partial = chain + depth - 1; /* the whole chain */ | 759 | partial = chain + depth - 1; /* the whole chain */ |
@@ -765,19 +765,82 @@ cleanup: | |||
765 | return err; | 765 | return err; |
766 | } | 766 | } |
767 | 767 | ||
768 | int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) | 768 | int ext2_get_block(struct inode *inode, sector_t iblock, |
769 | struct buffer_head *bh_result, int create) | ||
769 | { | 770 | { |
770 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 771 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
771 | int ret = ext2_get_blocks(inode, iblock, max_blocks, | 772 | bool new = false, boundary = false; |
772 | bh_result, create); | 773 | u32 bno; |
773 | if (ret > 0) { | 774 | int ret; |
774 | bh_result->b_size = (ret << inode->i_blkbits); | 775 | |
775 | ret = 0; | 776 | ret = ext2_get_blocks(inode, iblock, max_blocks, &bno, &new, &boundary, |
777 | create); | ||
778 | if (ret <= 0) | ||
779 | return ret; | ||
780 | |||
781 | map_bh(bh_result, inode->i_sb, bno); | ||
782 | bh_result->b_size = (ret << inode->i_blkbits); | ||
783 | if (new) | ||
784 | set_buffer_new(bh_result); | ||
785 | if (boundary) | ||
786 | set_buffer_boundary(bh_result); | ||
787 | return 0; | ||
788 | |||
789 | } | ||
790 | |||
791 | #ifdef CONFIG_FS_DAX | ||
792 | static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, | ||
793 | unsigned flags, struct iomap *iomap) | ||
794 | { | ||
795 | unsigned int blkbits = inode->i_blkbits; | ||
796 | unsigned long first_block = offset >> blkbits; | ||
797 | unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits; | ||
798 | bool new = false, boundary = false; | ||
799 | u32 bno; | ||
800 | int ret; | ||
801 | |||
802 | ret = ext2_get_blocks(inode, first_block, max_blocks, | ||
803 | &bno, &new, &boundary, flags & IOMAP_WRITE); | ||
804 | if (ret < 0) | ||
805 | return ret; | ||
806 | |||
807 | iomap->flags = 0; | ||
808 | iomap->bdev = inode->i_sb->s_bdev; | ||
809 | iomap->offset = (u64)first_block << blkbits; | ||
810 | |||
811 | if (ret == 0) { | ||
812 | iomap->type = IOMAP_HOLE; | ||
813 | iomap->blkno = IOMAP_NULL_BLOCK; | ||
814 | iomap->length = 1 << blkbits; | ||
815 | } else { | ||
816 | iomap->type = IOMAP_MAPPED; | ||
817 | iomap->blkno = (sector_t)bno << (blkbits - 9); | ||
818 | iomap->length = (u64)ret << blkbits; | ||
819 | iomap->flags |= IOMAP_F_MERGED; | ||
776 | } | 820 | } |
777 | return ret; | ||
778 | 821 | ||
822 | if (new) | ||
823 | iomap->flags |= IOMAP_F_NEW; | ||
824 | return 0; | ||
779 | } | 825 | } |
780 | 826 | ||
827 | static int | ||
828 | ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, | ||
829 | ssize_t written, unsigned flags, struct iomap *iomap) | ||
830 | { | ||
831 | if (iomap->type == IOMAP_MAPPED && | ||
832 | written < length && | ||
833 | (flags & IOMAP_WRITE)) | ||
834 | ext2_write_failed(inode->i_mapping, offset + length); | ||
835 | return 0; | ||
836 | } | ||
837 | |||
838 | struct iomap_ops ext2_iomap_ops = { | ||
839 | .iomap_begin = ext2_iomap_begin, | ||
840 | .iomap_end = ext2_iomap_end, | ||
841 | }; | ||
842 | #endif /* CONFIG_FS_DAX */ | ||
843 | |||
781 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 844 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
782 | u64 start, u64 len) | 845 | u64 start, u64 len) |
783 | { | 846 | { |
@@ -863,11 +926,10 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
863 | loff_t offset = iocb->ki_pos; | 926 | loff_t offset = iocb->ki_pos; |
864 | ssize_t ret; | 927 | ssize_t ret; |
865 | 928 | ||
866 | if (IS_DAX(inode)) | 929 | if (WARN_ON_ONCE(IS_DAX(inode))) |
867 | ret = dax_do_io(iocb, inode, iter, ext2_get_block, NULL, | 930 | return -EIO; |
868 | DIO_LOCKING); | 931 | |
869 | else | 932 | ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block); |
870 | ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block); | ||
871 | if (ret < 0 && iov_iter_rw(iter) == WRITE) | 933 | if (ret < 0 && iov_iter_rw(iter) == WRITE) |
872 | ext2_write_failed(mapping, offset + count); | 934 | ext2_write_failed(mapping, offset + count); |
873 | return ret; | 935 | return ret; |
diff --git a/fs/internal.h b/fs/internal.h index ba0737649d4a..859178692ce4 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -12,6 +12,7 @@ | |||
12 | struct super_block; | 12 | struct super_block; |
13 | struct file_system_type; | 13 | struct file_system_type; |
14 | struct iomap; | 14 | struct iomap; |
15 | struct iomap_ops; | ||
15 | struct linux_binprm; | 16 | struct linux_binprm; |
16 | struct path; | 17 | struct path; |
17 | struct mount; | 18 | struct mount; |
@@ -164,3 +165,13 @@ extern struct dentry_operations ns_dentry_operations; | |||
164 | extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd, | 165 | extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd, |
165 | unsigned long arg); | 166 | unsigned long arg); |
166 | extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 167 | extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
168 | |||
169 | /* | ||
170 | * iomap support: | ||
171 | */ | ||
172 | typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len, | ||
173 | void *data, struct iomap *iomap); | ||
174 | |||
175 | loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length, | ||
176 | unsigned flags, struct iomap_ops *ops, void *data, | ||
177 | iomap_actor_t actor); | ||
diff --git a/fs/iomap.c b/fs/iomap.c index ec411a6b9edc..013d1d36fbbf 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
@@ -27,9 +27,6 @@ | |||
27 | #include <linux/dax.h> | 27 | #include <linux/dax.h> |
28 | #include "internal.h" | 28 | #include "internal.h" |
29 | 29 | ||
30 | typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len, | ||
31 | void *data, struct iomap *iomap); | ||
32 | |||
33 | /* | 30 | /* |
34 | * Execute a iomap write on a segment of the mapping that spans a | 31 | * Execute a iomap write on a segment of the mapping that spans a |
35 | * contiguous range of pages that have identical block mapping state. | 32 | * contiguous range of pages that have identical block mapping state. |
@@ -41,7 +38,7 @@ typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len, | |||
41 | * resources they require in the iomap_begin call, and release them in the | 38 | * resources they require in the iomap_begin call, and release them in the |
42 | * iomap_end call. | 39 | * iomap_end call. |
43 | */ | 40 | */ |
44 | static loff_t | 41 | loff_t |
45 | iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, | 42 | iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, |
46 | struct iomap_ops *ops, void *data, iomap_actor_t actor) | 43 | struct iomap_ops *ops, void *data, iomap_actor_t actor) |
47 | { | 44 | { |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 7575cfc3ad15..4a28fa91e3b1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -200,7 +200,7 @@ xfs_setfilesize_trans_alloc( | |||
200 | * Update on-disk file size now that data has been written to disk. | 200 | * Update on-disk file size now that data has been written to disk. |
201 | */ | 201 | */ |
202 | STATIC int | 202 | STATIC int |
203 | xfs_setfilesize( | 203 | __xfs_setfilesize( |
204 | struct xfs_inode *ip, | 204 | struct xfs_inode *ip, |
205 | struct xfs_trans *tp, | 205 | struct xfs_trans *tp, |
206 | xfs_off_t offset, | 206 | xfs_off_t offset, |
@@ -225,6 +225,23 @@ xfs_setfilesize( | |||
225 | return xfs_trans_commit(tp); | 225 | return xfs_trans_commit(tp); |
226 | } | 226 | } |
227 | 227 | ||
228 | int | ||
229 | xfs_setfilesize( | ||
230 | struct xfs_inode *ip, | ||
231 | xfs_off_t offset, | ||
232 | size_t size) | ||
233 | { | ||
234 | struct xfs_mount *mp = ip->i_mount; | ||
235 | struct xfs_trans *tp; | ||
236 | int error; | ||
237 | |||
238 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); | ||
239 | if (error) | ||
240 | return error; | ||
241 | |||
242 | return __xfs_setfilesize(ip, tp, offset, size); | ||
243 | } | ||
244 | |||
228 | STATIC int | 245 | STATIC int |
229 | xfs_setfilesize_ioend( | 246 | xfs_setfilesize_ioend( |
230 | struct xfs_ioend *ioend, | 247 | struct xfs_ioend *ioend, |
@@ -247,7 +264,7 @@ xfs_setfilesize_ioend( | |||
247 | return error; | 264 | return error; |
248 | } | 265 | } |
249 | 266 | ||
250 | return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); | 267 | return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); |
251 | } | 268 | } |
252 | 269 | ||
253 | /* | 270 | /* |
@@ -1336,13 +1353,12 @@ xfs_end_io_direct_write( | |||
1336 | { | 1353 | { |
1337 | struct inode *inode = file_inode(iocb->ki_filp); | 1354 | struct inode *inode = file_inode(iocb->ki_filp); |
1338 | struct xfs_inode *ip = XFS_I(inode); | 1355 | struct xfs_inode *ip = XFS_I(inode); |
1339 | struct xfs_mount *mp = ip->i_mount; | ||
1340 | uintptr_t flags = (uintptr_t)private; | 1356 | uintptr_t flags = (uintptr_t)private; |
1341 | int error = 0; | 1357 | int error = 0; |
1342 | 1358 | ||
1343 | trace_xfs_end_io_direct_write(ip, offset, size); | 1359 | trace_xfs_end_io_direct_write(ip, offset, size); |
1344 | 1360 | ||
1345 | if (XFS_FORCED_SHUTDOWN(mp)) | 1361 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
1346 | return -EIO; | 1362 | return -EIO; |
1347 | 1363 | ||
1348 | if (size <= 0) | 1364 | if (size <= 0) |
@@ -1380,14 +1396,9 @@ xfs_end_io_direct_write( | |||
1380 | 1396 | ||
1381 | error = xfs_iomap_write_unwritten(ip, offset, size); | 1397 | error = xfs_iomap_write_unwritten(ip, offset, size); |
1382 | } else if (flags & XFS_DIO_FLAG_APPEND) { | 1398 | } else if (flags & XFS_DIO_FLAG_APPEND) { |
1383 | struct xfs_trans *tp; | ||
1384 | |||
1385 | trace_xfs_end_io_direct_write_append(ip, offset, size); | 1399 | trace_xfs_end_io_direct_write_append(ip, offset, size); |
1386 | 1400 | ||
1387 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, | 1401 | error = xfs_setfilesize(ip, offset, size); |
1388 | &tp); | ||
1389 | if (!error) | ||
1390 | error = xfs_setfilesize(ip, tp, offset, size); | ||
1391 | } | 1402 | } |
1392 | 1403 | ||
1393 | return error; | 1404 | return error; |
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index bf2d9a141a73..1950e3bca2ac 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h | |||
@@ -62,6 +62,7 @@ int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset, | |||
62 | 62 | ||
63 | int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset, | 63 | int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset, |
64 | ssize_t size, void *private); | 64 | ssize_t size, void *private); |
65 | int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); | ||
65 | 66 | ||
66 | extern void xfs_count_page_state(struct page *, int *, int *); | 67 | extern void xfs_count_page_state(struct page *, int *, int *); |
67 | extern struct block_device *xfs_find_bdev_for_inode(struct inode *); | 68 | extern struct block_device *xfs_find_bdev_for_inode(struct inode *); |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index b927ea9abe33..c68517b0f248 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -333,10 +333,7 @@ xfs_file_dax_read( | |||
333 | struct kiocb *iocb, | 333 | struct kiocb *iocb, |
334 | struct iov_iter *to) | 334 | struct iov_iter *to) |
335 | { | 335 | { |
336 | struct address_space *mapping = iocb->ki_filp->f_mapping; | 336 | struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); |
337 | struct inode *inode = mapping->host; | ||
338 | struct xfs_inode *ip = XFS_I(inode); | ||
339 | struct iov_iter data = *to; | ||
340 | size_t count = iov_iter_count(to); | 337 | size_t count = iov_iter_count(to); |
341 | ssize_t ret = 0; | 338 | ssize_t ret = 0; |
342 | 339 | ||
@@ -346,11 +343,7 @@ xfs_file_dax_read( | |||
346 | return 0; /* skip atime */ | 343 | return 0; /* skip atime */ |
347 | 344 | ||
348 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); | 345 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
349 | ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0); | 346 | ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops); |
350 | if (ret > 0) { | ||
351 | iocb->ki_pos += ret; | ||
352 | iov_iter_advance(to, ret); | ||
353 | } | ||
354 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); | 347 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
355 | 348 | ||
356 | file_accessed(iocb->ki_filp); | 349 | file_accessed(iocb->ki_filp); |
@@ -712,70 +705,32 @@ xfs_file_dax_write( | |||
712 | struct kiocb *iocb, | 705 | struct kiocb *iocb, |
713 | struct iov_iter *from) | 706 | struct iov_iter *from) |
714 | { | 707 | { |
715 | struct address_space *mapping = iocb->ki_filp->f_mapping; | 708 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
716 | struct inode *inode = mapping->host; | ||
717 | struct xfs_inode *ip = XFS_I(inode); | 709 | struct xfs_inode *ip = XFS_I(inode); |
718 | struct xfs_mount *mp = ip->i_mount; | 710 | int iolock = XFS_IOLOCK_EXCL; |
719 | ssize_t ret = 0; | 711 | ssize_t ret, error = 0; |
720 | int unaligned_io = 0; | 712 | size_t count; |
721 | int iolock; | 713 | loff_t pos; |
722 | struct iov_iter data; | ||
723 | 714 | ||
724 | /* "unaligned" here means not aligned to a filesystem block */ | ||
725 | if ((iocb->ki_pos & mp->m_blockmask) || | ||
726 | ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) { | ||
727 | unaligned_io = 1; | ||
728 | iolock = XFS_IOLOCK_EXCL; | ||
729 | } else if (mapping->nrpages) { | ||
730 | iolock = XFS_IOLOCK_EXCL; | ||
731 | } else { | ||
732 | iolock = XFS_IOLOCK_SHARED; | ||
733 | } | ||
734 | xfs_rw_ilock(ip, iolock); | 715 | xfs_rw_ilock(ip, iolock); |
735 | |||
736 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); | 716 | ret = xfs_file_aio_write_checks(iocb, from, &iolock); |
737 | if (ret) | 717 | if (ret) |
738 | goto out; | 718 | goto out; |
739 | 719 | ||
740 | /* | 720 | pos = iocb->ki_pos; |
741 | * Yes, even DAX files can have page cache attached to them: A zeroed | 721 | count = iov_iter_count(from); |
742 | * page is inserted into the pagecache when we have to serve a write | ||
743 | * fault on a hole. It should never be dirtied and can simply be | ||
744 | * dropped from the pagecache once we get real data for the page. | ||
745 | * | ||
746 | * XXX: This is racy against mmap, and there's nothing we can do about | ||
747 | * it. dax_do_io() should really do this invalidation internally as | ||
748 | * it will know if we've allocated over a holei for this specific IO and | ||
749 | * if so it needs to update the mapping tree and invalidate existing | ||
750 | * PTEs over the newly allocated range. Remove this invalidation when | ||
751 | * dax_do_io() is fixed up. | ||
752 | */ | ||
753 | if (mapping->nrpages) { | ||
754 | loff_t end = iocb->ki_pos + iov_iter_count(from) - 1; | ||
755 | 722 | ||
756 | ret = invalidate_inode_pages2_range(mapping, | 723 | trace_xfs_file_dax_write(ip, count, pos); |
757 | iocb->ki_pos >> PAGE_SHIFT, | ||
758 | end >> PAGE_SHIFT); | ||
759 | WARN_ON_ONCE(ret); | ||
760 | } | ||
761 | 724 | ||
762 | if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) { | 725 | ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops); |
763 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); | 726 | if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { |
764 | iolock = XFS_IOLOCK_SHARED; | 727 | i_size_write(inode, iocb->ki_pos); |
728 | error = xfs_setfilesize(ip, pos, ret); | ||
765 | } | 729 | } |
766 | 730 | ||
767 | trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); | ||
768 | |||
769 | data = *from; | ||
770 | ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, | ||
771 | xfs_end_io_direct_write, 0); | ||
772 | if (ret > 0) { | ||
773 | iocb->ki_pos += ret; | ||
774 | iov_iter_advance(from, ret); | ||
775 | } | ||
776 | out: | 731 | out: |
777 | xfs_rw_iunlock(ip, iolock); | 732 | xfs_rw_iunlock(ip, iolock); |
778 | return ret; | 733 | return error ? error : ret; |
779 | } | 734 | } |
780 | 735 | ||
781 | STATIC ssize_t | 736 | STATIC ssize_t |
@@ -1514,7 +1469,7 @@ xfs_filemap_page_mkwrite( | |||
1514 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); | 1469 | xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); |
1515 | 1470 | ||
1516 | if (IS_DAX(inode)) { | 1471 | if (IS_DAX(inode)) { |
1517 | ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); | 1472 | ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); |
1518 | } else { | 1473 | } else { |
1519 | ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); | 1474 | ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); |
1520 | ret = block_page_mkwrite_return(ret); | 1475 | ret = block_page_mkwrite_return(ret); |
@@ -1548,7 +1503,7 @@ xfs_filemap_fault( | |||
1548 | * changes to xfs_get_blocks_direct() to map unwritten extent | 1503 | * changes to xfs_get_blocks_direct() to map unwritten extent |
1549 | * ioend for conversion on read-only mappings. | 1504 | * ioend for conversion on read-only mappings. |
1550 | */ | 1505 | */ |
1551 | ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault); | 1506 | ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops); |
1552 | } else | 1507 | } else |
1553 | ret = filemap_fault(vma, vmf); | 1508 | ret = filemap_fault(vma, vmf); |
1554 | xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); | 1509 | xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f96c8ffce5f4..c08253e11545 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -934,11 +934,13 @@ error_on_bmapi_transaction: | |||
934 | return error; | 934 | return error; |
935 | } | 935 | } |
936 | 936 | ||
937 | static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps) | 937 | static inline bool imap_needs_alloc(struct inode *inode, |
938 | struct xfs_bmbt_irec *imap, int nimaps) | ||
938 | { | 939 | { |
939 | return !nimaps || | 940 | return !nimaps || |
940 | imap->br_startblock == HOLESTARTBLOCK || | 941 | imap->br_startblock == HOLESTARTBLOCK || |
941 | imap->br_startblock == DELAYSTARTBLOCK; | 942 | imap->br_startblock == DELAYSTARTBLOCK || |
943 | (IS_DAX(inode) && ISUNWRITTEN(imap)); | ||
942 | } | 944 | } |
943 | 945 | ||
944 | static int | 946 | static int |
@@ -954,16 +956,18 @@ xfs_file_iomap_begin( | |||
954 | struct xfs_bmbt_irec imap; | 956 | struct xfs_bmbt_irec imap; |
955 | xfs_fileoff_t offset_fsb, end_fsb; | 957 | xfs_fileoff_t offset_fsb, end_fsb; |
956 | int nimaps = 1, error = 0; | 958 | int nimaps = 1, error = 0; |
959 | unsigned lockmode; | ||
957 | 960 | ||
958 | if (XFS_FORCED_SHUTDOWN(mp)) | 961 | if (XFS_FORCED_SHUTDOWN(mp)) |
959 | return -EIO; | 962 | return -EIO; |
960 | 963 | ||
961 | if ((flags & IOMAP_WRITE) && !xfs_get_extsz_hint(ip)) { | 964 | if ((flags & IOMAP_WRITE) && |
965 | !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { | ||
962 | return xfs_file_iomap_begin_delay(inode, offset, length, flags, | 966 | return xfs_file_iomap_begin_delay(inode, offset, length, flags, |
963 | iomap); | 967 | iomap); |
964 | } | 968 | } |
965 | 969 | ||
966 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 970 | lockmode = xfs_ilock_data_map_shared(ip); |
967 | 971 | ||
968 | ASSERT(offset <= mp->m_super->s_maxbytes); | 972 | ASSERT(offset <= mp->m_super->s_maxbytes); |
969 | if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) | 973 | if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) |
@@ -974,11 +978,11 @@ xfs_file_iomap_begin( | |||
974 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, | 978 | error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, |
975 | &nimaps, XFS_BMAPI_ENTIRE); | 979 | &nimaps, XFS_BMAPI_ENTIRE); |
976 | if (error) { | 980 | if (error) { |
977 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 981 | xfs_iunlock(ip, lockmode); |
978 | return error; | 982 | return error; |
979 | } | 983 | } |
980 | 984 | ||
981 | if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) { | 985 | if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { |
982 | /* | 986 | /* |
983 | * We cap the maximum length we map here to MAX_WRITEBACK_PAGES | 987 | * We cap the maximum length we map here to MAX_WRITEBACK_PAGES |
984 | * pages to keep the chunks of work done where somewhat symmetric | 988 | * pages to keep the chunks of work done where somewhat symmetric |
@@ -994,17 +998,19 @@ xfs_file_iomap_begin( | |||
994 | * xfs_iomap_write_direct() expects the shared lock. It | 998 | * xfs_iomap_write_direct() expects the shared lock. It |
995 | * is unlocked on return. | 999 | * is unlocked on return. |
996 | */ | 1000 | */ |
997 | xfs_ilock_demote(ip, XFS_ILOCK_EXCL); | 1001 | if (lockmode == XFS_ILOCK_EXCL) |
1002 | xfs_ilock_demote(ip, lockmode); | ||
998 | error = xfs_iomap_write_direct(ip, offset, length, &imap, | 1003 | error = xfs_iomap_write_direct(ip, offset, length, &imap, |
999 | nimaps); | 1004 | nimaps); |
1000 | if (error) | 1005 | if (error) |
1001 | return error; | 1006 | return error; |
1002 | 1007 | ||
1008 | iomap->flags = IOMAP_F_NEW; | ||
1003 | trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); | 1009 | trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); |
1004 | } else { | 1010 | } else { |
1005 | ASSERT(nimaps); | 1011 | ASSERT(nimaps); |
1006 | 1012 | ||
1007 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1013 | xfs_iunlock(ip, lockmode); |
1008 | trace_xfs_iomap_found(ip, offset, length, 0, &imap); | 1014 | trace_xfs_iomap_found(ip, offset, length, 0, &imap); |
1009 | } | 1015 | } |
1010 | 1016 | ||
diff --git a/include/linux/dax.h b/include/linux/dax.h index 9c6dc7704043..add6c4bc568f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h | |||
@@ -6,13 +6,19 @@ | |||
6 | #include <linux/radix-tree.h> | 6 | #include <linux/radix-tree.h> |
7 | #include <asm/pgtable.h> | 7 | #include <asm/pgtable.h> |
8 | 8 | ||
9 | struct iomap_ops; | ||
10 | |||
9 | /* We use lowest available exceptional entry bit for locking */ | 11 | /* We use lowest available exceptional entry bit for locking */ |
10 | #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) | 12 | #define RADIX_DAX_ENTRY_LOCK (1 << RADIX_TREE_EXCEPTIONAL_SHIFT) |
11 | 13 | ||
14 | ssize_t iomap_dax_rw(struct kiocb *iocb, struct iov_iter *iter, | ||
15 | struct iomap_ops *ops); | ||
12 | ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, | 16 | ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, |
13 | get_block_t, dio_iodone_t, int flags); | 17 | get_block_t, dio_iodone_t, int flags); |
14 | int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); | 18 | int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t); |
15 | int dax_truncate_page(struct inode *, loff_t from, get_block_t); | 19 | int dax_truncate_page(struct inode *, loff_t from, get_block_t); |
20 | int iomap_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf, | ||
21 | struct iomap_ops *ops); | ||
16 | int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); | 22 | int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t); |
17 | int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); | 23 | int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); |
18 | void dax_wake_mapping_entry_waiter(struct address_space *mapping, | 24 | void dax_wake_mapping_entry_waiter(struct address_space *mapping, |
diff --git a/include/linux/iomap.h b/include/linux/iomap.h index c74226a738a3..e63e288dee83 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h | |||
@@ -23,6 +23,7 @@ struct vm_fault; | |||
23 | */ | 23 | */ |
24 | #define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ | 24 | #define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ |
25 | #define IOMAP_F_SHARED 0x02 /* block shared with another file */ | 25 | #define IOMAP_F_SHARED 0x02 /* block shared with another file */ |
26 | #define IOMAP_F_NEW 0x04 /* blocks have been newly allocated */ | ||
26 | 27 | ||
27 | /* | 28 | /* |
28 | * Magic value for blkno: | 29 | * Magic value for blkno: |