diff options
Diffstat (limited to 'fs/ext4/file.c')
| -rw-r--r-- | fs/ext4/file.c | 270 |
1 files changed, 161 insertions, 109 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 513c12cf444c..33a09da16c9c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -95,7 +95,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
| 95 | struct inode *inode = file_inode(iocb->ki_filp); | 95 | struct inode *inode = file_inode(iocb->ki_filp); |
| 96 | struct mutex *aio_mutex = NULL; | 96 | struct mutex *aio_mutex = NULL; |
| 97 | struct blk_plug plug; | 97 | struct blk_plug plug; |
| 98 | int o_direct = file->f_flags & O_DIRECT; | 98 | int o_direct = io_is_direct(file); |
| 99 | int overwrite = 0; | 99 | int overwrite = 0; |
| 100 | size_t length = iov_iter_count(from); | 100 | size_t length = iov_iter_count(from); |
| 101 | ssize_t ret; | 101 | ssize_t ret; |
| @@ -191,17 +191,41 @@ errout: | |||
| 191 | return ret; | 191 | return ret; |
| 192 | } | 192 | } |
| 193 | 193 | ||
| 194 | #ifdef CONFIG_FS_DAX | ||
| 195 | static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
| 196 | { | ||
| 197 | return dax_fault(vma, vmf, ext4_get_block); | ||
| 198 | /* Is this the right get_block? */ | ||
| 199 | } | ||
| 200 | |||
| 201 | static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
| 202 | { | ||
| 203 | return dax_mkwrite(vma, vmf, ext4_get_block); | ||
| 204 | } | ||
| 205 | |||
| 206 | static const struct vm_operations_struct ext4_dax_vm_ops = { | ||
| 207 | .fault = ext4_dax_fault, | ||
| 208 | .page_mkwrite = ext4_dax_mkwrite, | ||
| 209 | }; | ||
| 210 | #else | ||
| 211 | #define ext4_dax_vm_ops ext4_file_vm_ops | ||
| 212 | #endif | ||
| 213 | |||
| 194 | static const struct vm_operations_struct ext4_file_vm_ops = { | 214 | static const struct vm_operations_struct ext4_file_vm_ops = { |
| 195 | .fault = filemap_fault, | 215 | .fault = filemap_fault, |
| 196 | .map_pages = filemap_map_pages, | 216 | .map_pages = filemap_map_pages, |
| 197 | .page_mkwrite = ext4_page_mkwrite, | 217 | .page_mkwrite = ext4_page_mkwrite, |
| 198 | .remap_pages = generic_file_remap_pages, | ||
| 199 | }; | 218 | }; |
| 200 | 219 | ||
| 201 | static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | 220 | static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) |
| 202 | { | 221 | { |
| 203 | file_accessed(file); | 222 | file_accessed(file); |
| 204 | vma->vm_ops = &ext4_file_vm_ops; | 223 | if (IS_DAX(file_inode(file))) { |
| 224 | vma->vm_ops = &ext4_dax_vm_ops; | ||
| 225 | vma->vm_flags |= VM_MIXEDMAP; | ||
| 226 | } else { | ||
| 227 | vma->vm_ops = &ext4_file_vm_ops; | ||
| 228 | } | ||
| 205 | return 0; | 229 | return 0; |
| 206 | } | 230 | } |
| 207 | 231 | ||
| @@ -273,19 +297,24 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
| 273 | * we determine this extent as a data or a hole according to whether the | 297 | * we determine this extent as a data or a hole according to whether the |
| 274 | * page cache has data or not. | 298 | * page cache has data or not. |
| 275 | */ | 299 | */ |
| 276 | static int ext4_find_unwritten_pgoff(struct inode *inode, int whence, | 300 | static int ext4_find_unwritten_pgoff(struct inode *inode, |
| 277 | loff_t endoff, loff_t *offset) | 301 | int whence, |
| 302 | struct ext4_map_blocks *map, | ||
| 303 | loff_t *offset) | ||
| 278 | { | 304 | { |
| 279 | struct pagevec pvec; | 305 | struct pagevec pvec; |
| 306 | unsigned int blkbits; | ||
| 280 | pgoff_t index; | 307 | pgoff_t index; |
| 281 | pgoff_t end; | 308 | pgoff_t end; |
| 309 | loff_t endoff; | ||
| 282 | loff_t startoff; | 310 | loff_t startoff; |
| 283 | loff_t lastoff; | 311 | loff_t lastoff; |
| 284 | int found = 0; | 312 | int found = 0; |
| 285 | 313 | ||
| 314 | blkbits = inode->i_sb->s_blocksize_bits; | ||
| 286 | startoff = *offset; | 315 | startoff = *offset; |
| 287 | lastoff = startoff; | 316 | lastoff = startoff; |
| 288 | 317 | endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; | |
| 289 | 318 | ||
| 290 | index = startoff >> PAGE_CACHE_SHIFT; | 319 | index = startoff >> PAGE_CACHE_SHIFT; |
| 291 | end = endoff >> PAGE_CACHE_SHIFT; | 320 | end = endoff >> PAGE_CACHE_SHIFT; |
| @@ -403,144 +432,147 @@ out: | |||
| 403 | static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | 432 | static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) |
| 404 | { | 433 | { |
| 405 | struct inode *inode = file->f_mapping->host; | 434 | struct inode *inode = file->f_mapping->host; |
| 406 | struct fiemap_extent_info fie; | 435 | struct ext4_map_blocks map; |
| 407 | struct fiemap_extent ext[2]; | 436 | struct extent_status es; |
| 408 | loff_t next; | 437 | ext4_lblk_t start, last, end; |
| 409 | int i, ret = 0; | 438 | loff_t dataoff, isize; |
| 439 | int blkbits; | ||
| 440 | int ret = 0; | ||
| 410 | 441 | ||
| 411 | mutex_lock(&inode->i_mutex); | 442 | mutex_lock(&inode->i_mutex); |
| 412 | if (offset >= inode->i_size) { | 443 | |
| 444 | isize = i_size_read(inode); | ||
| 445 | if (offset >= isize) { | ||
| 413 | mutex_unlock(&inode->i_mutex); | 446 | mutex_unlock(&inode->i_mutex); |
| 414 | return -ENXIO; | 447 | return -ENXIO; |
| 415 | } | 448 | } |
| 416 | fie.fi_flags = 0; | 449 | |
| 417 | fie.fi_extents_max = 2; | 450 | blkbits = inode->i_sb->s_blocksize_bits; |
| 418 | fie.fi_extents_start = (struct fiemap_extent __user *) &ext; | 451 | start = offset >> blkbits; |
| 419 | while (1) { | 452 | last = start; |
| 420 | mm_segment_t old_fs = get_fs(); | 453 | end = isize >> blkbits; |
| 421 | 454 | dataoff = offset; | |
| 422 | fie.fi_extents_mapped = 0; | 455 | |
| 423 | memset(ext, 0, sizeof(*ext) * fie.fi_extents_max); | 456 | do { |
| 424 | 457 | map.m_lblk = last; | |
| 425 | set_fs(get_ds()); | 458 | map.m_len = end - last + 1; |
| 426 | ret = ext4_fiemap(inode, &fie, offset, maxsize - offset); | 459 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
| 427 | set_fs(old_fs); | 460 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { |
| 428 | if (ret) | 461 | if (last != start) |
| 462 | dataoff = (loff_t)last << blkbits; | ||
| 429 | break; | 463 | break; |
| 464 | } | ||
| 430 | 465 | ||
| 431 | /* No extents found, EOF */ | 466 | /* |
| 432 | if (!fie.fi_extents_mapped) { | 467 | * If there is a delay extent at this offset, |
| 433 | ret = -ENXIO; | 468 | * it will be as a data. |
| 469 | */ | ||
| 470 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | ||
| 471 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | ||
| 472 | if (last != start) | ||
| 473 | dataoff = (loff_t)last << blkbits; | ||
| 434 | break; | 474 | break; |
| 435 | } | 475 | } |
| 436 | for (i = 0; i < fie.fi_extents_mapped; i++) { | ||
| 437 | next = (loff_t)(ext[i].fe_length + ext[i].fe_logical); | ||
| 438 | 476 | ||
| 439 | if (offset < (loff_t)ext[i].fe_logical) | 477 | /* |
| 440 | offset = (loff_t)ext[i].fe_logical; | 478 | * If there is a unwritten extent at this offset, |
| 441 | /* | 479 | * it will be as a data or a hole according to page |
| 442 | * If extent is not unwritten, then it contains valid | 480 | * cache that has data or not. |
| 443 | * data, mapped or delayed. | 481 | */ |
| 444 | */ | 482 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { |
| 445 | if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) | 483 | int unwritten; |
| 446 | goto out; | 484 | unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, |
| 485 | &map, &dataoff); | ||
| 486 | if (unwritten) | ||
| 487 | break; | ||
| 488 | } | ||
| 447 | 489 | ||
| 448 | /* | 490 | last++; |
| 449 | * If there is a unwritten extent at this offset, | 491 | dataoff = (loff_t)last << blkbits; |
| 450 | * it will be as a data or a hole according to page | 492 | } while (last <= end); |
| 451 | * cache that has data or not. | ||
| 452 | */ | ||
| 453 | if (ext4_find_unwritten_pgoff(inode, SEEK_DATA, | ||
| 454 | next, &offset)) | ||
| 455 | goto out; | ||
| 456 | 493 | ||
| 457 | if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) { | ||
| 458 | ret = -ENXIO; | ||
| 459 | goto out; | ||
| 460 | } | ||
| 461 | offset = next; | ||
| 462 | } | ||
| 463 | } | ||
| 464 | if (offset > inode->i_size) | ||
| 465 | offset = inode->i_size; | ||
| 466 | out: | ||
| 467 | mutex_unlock(&inode->i_mutex); | 494 | mutex_unlock(&inode->i_mutex); |
| 468 | if (ret) | ||
| 469 | return ret; | ||
| 470 | 495 | ||
| 471 | return vfs_setpos(file, offset, maxsize); | 496 | if (dataoff > isize) |
| 497 | return -ENXIO; | ||
| 498 | |||
| 499 | return vfs_setpos(file, dataoff, maxsize); | ||
| 472 | } | 500 | } |
| 473 | 501 | ||
| 474 | /* | 502 | /* |
| 475 | * ext4_seek_hole() retrieves the offset for SEEK_HOLE | 503 | * ext4_seek_hole() retrieves the offset for SEEK_HOLE. |
| 476 | */ | 504 | */ |
| 477 | static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | 505 | static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) |
| 478 | { | 506 | { |
| 479 | struct inode *inode = file->f_mapping->host; | 507 | struct inode *inode = file->f_mapping->host; |
| 480 | struct fiemap_extent_info fie; | 508 | struct ext4_map_blocks map; |
| 481 | struct fiemap_extent ext[2]; | 509 | struct extent_status es; |
| 482 | loff_t next; | 510 | ext4_lblk_t start, last, end; |
| 483 | int i, ret = 0; | 511 | loff_t holeoff, isize; |
| 512 | int blkbits; | ||
| 513 | int ret = 0; | ||
| 484 | 514 | ||
| 485 | mutex_lock(&inode->i_mutex); | 515 | mutex_lock(&inode->i_mutex); |
| 486 | if (offset >= inode->i_size) { | 516 | |
| 517 | isize = i_size_read(inode); | ||
| 518 | if (offset >= isize) { | ||
| 487 | mutex_unlock(&inode->i_mutex); | 519 | mutex_unlock(&inode->i_mutex); |
| 488 | return -ENXIO; | 520 | return -ENXIO; |
| 489 | } | 521 | } |
| 490 | 522 | ||
| 491 | fie.fi_flags = 0; | 523 | blkbits = inode->i_sb->s_blocksize_bits; |
| 492 | fie.fi_extents_max = 2; | 524 | start = offset >> blkbits; |
| 493 | fie.fi_extents_start = (struct fiemap_extent __user *)&ext; | 525 | last = start; |
| 494 | while (1) { | 526 | end = isize >> blkbits; |
| 495 | mm_segment_t old_fs = get_fs(); | 527 | holeoff = offset; |
| 496 | 528 | ||
| 497 | fie.fi_extents_mapped = 0; | 529 | do { |
| 498 | memset(ext, 0, sizeof(*ext)); | 530 | map.m_lblk = last; |
| 499 | 531 | map.m_len = end - last + 1; | |
| 500 | set_fs(get_ds()); | 532 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
| 501 | ret = ext4_fiemap(inode, &fie, offset, maxsize - offset); | 533 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { |
| 502 | set_fs(old_fs); | 534 | last += ret; |
| 503 | if (ret) | 535 | holeoff = (loff_t)last << blkbits; |
| 504 | break; | 536 | continue; |
| 537 | } | ||
| 505 | 538 | ||
| 506 | /* No extents found */ | 539 | /* |
| 507 | if (!fie.fi_extents_mapped) | 540 | * If there is a delay extent at this offset, |
| 508 | break; | 541 | * we will skip this extent. |
| 542 | */ | ||
| 543 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | ||
| 544 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | ||
| 545 | last = es.es_lblk + es.es_len; | ||
| 546 | holeoff = (loff_t)last << blkbits; | ||
| 547 | continue; | ||
| 548 | } | ||
| 509 | 549 | ||
| 510 | for (i = 0; i < fie.fi_extents_mapped; i++) { | 550 | /* |
| 511 | next = (loff_t)(ext[i].fe_logical + ext[i].fe_length); | 551 | * If there is a unwritten extent at this offset, |
| 512 | /* | 552 | * it will be as a data or a hole according to page |
| 513 | * If extent is not unwritten, then it contains valid | 553 | * cache that has data or not. |
| 514 | * data, mapped or delayed. | 554 | */ |
| 515 | */ | 555 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { |
| 516 | if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) { | 556 | int unwritten; |
| 517 | if (offset < (loff_t)ext[i].fe_logical) | 557 | unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, |
| 518 | goto out; | 558 | &map, &holeoff); |
| 519 | offset = next; | 559 | if (!unwritten) { |
| 560 | last += ret; | ||
| 561 | holeoff = (loff_t)last << blkbits; | ||
| 520 | continue; | 562 | continue; |
| 521 | } | 563 | } |
| 522 | /* | ||
| 523 | * If there is a unwritten extent at this offset, | ||
| 524 | * it will be as a data or a hole according to page | ||
| 525 | * cache that has data or not. | ||
| 526 | */ | ||
| 527 | if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE, | ||
| 528 | next, &offset)) | ||
| 529 | goto out; | ||
| 530 | |||
| 531 | offset = next; | ||
| 532 | if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) | ||
| 533 | goto out; | ||
| 534 | } | 564 | } |
| 535 | } | 565 | |
| 536 | if (offset > inode->i_size) | 566 | /* find a hole */ |
| 537 | offset = inode->i_size; | 567 | break; |
| 538 | out: | 568 | } while (last <= end); |
| 569 | |||
| 539 | mutex_unlock(&inode->i_mutex); | 570 | mutex_unlock(&inode->i_mutex); |
| 540 | if (ret) | ||
| 541 | return ret; | ||
| 542 | 571 | ||
| 543 | return vfs_setpos(file, offset, maxsize); | 572 | if (holeoff > isize) |
| 573 | holeoff = isize; | ||
| 574 | |||
| 575 | return vfs_setpos(file, holeoff, maxsize); | ||
| 544 | } | 576 | } |
| 545 | 577 | ||
| 546 | /* | 578 | /* |
| @@ -592,6 +624,26 @@ const struct file_operations ext4_file_operations = { | |||
| 592 | .fallocate = ext4_fallocate, | 624 | .fallocate = ext4_fallocate, |
| 593 | }; | 625 | }; |
| 594 | 626 | ||
| 627 | #ifdef CONFIG_FS_DAX | ||
| 628 | const struct file_operations ext4_dax_file_operations = { | ||
| 629 | .llseek = ext4_llseek, | ||
| 630 | .read = new_sync_read, | ||
| 631 | .write = new_sync_write, | ||
| 632 | .read_iter = generic_file_read_iter, | ||
| 633 | .write_iter = ext4_file_write_iter, | ||
| 634 | .unlocked_ioctl = ext4_ioctl, | ||
| 635 | #ifdef CONFIG_COMPAT | ||
| 636 | .compat_ioctl = ext4_compat_ioctl, | ||
| 637 | #endif | ||
| 638 | .mmap = ext4_file_mmap, | ||
| 639 | .open = ext4_file_open, | ||
| 640 | .release = ext4_release_file, | ||
| 641 | .fsync = ext4_sync_file, | ||
| 642 | /* Splice not yet supported with DAX */ | ||
| 643 | .fallocate = ext4_fallocate, | ||
| 644 | }; | ||
| 645 | #endif | ||
| 646 | |||
| 595 | const struct inode_operations ext4_file_inode_operations = { | 647 | const struct inode_operations ext4_file_inode_operations = { |
| 596 | .setattr = ext4_setattr, | 648 | .setattr = ext4_setattr, |
| 597 | .getattr = ext4_getattr, | 649 | .getattr = ext4_getattr, |
