diff options
Diffstat (limited to 'fs/ocfs2/aops.c')
| -rw-r--r-- | fs/ocfs2/aops.c | 242 |
1 files changed, 236 insertions, 6 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 46d93e941f3d..44db1808cdb5 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/pipe_fs_i.h> | 28 | #include <linux/pipe_fs_i.h> |
| 29 | #include <linux/mpage.h> | 29 | #include <linux/mpage.h> |
| 30 | #include <linux/quotaops.h> | 30 | #include <linux/quotaops.h> |
| 31 | #include <linux/blkdev.h> | ||
| 31 | 32 | ||
| 32 | #include <cluster/masklog.h> | 33 | #include <cluster/masklog.h> |
| 33 | 34 | ||
| @@ -47,6 +48,9 @@ | |||
| 47 | #include "ocfs2_trace.h" | 48 | #include "ocfs2_trace.h" |
| 48 | 49 | ||
| 49 | #include "buffer_head_io.h" | 50 | #include "buffer_head_io.h" |
| 51 | #include "dir.h" | ||
| 52 | #include "namei.h" | ||
| 53 | #include "sysfile.h" | ||
| 50 | 54 | ||
| 51 | static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, | 55 | static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, |
| 52 | struct buffer_head *bh_result, int create) | 56 | struct buffer_head *bh_result, int create) |
| @@ -506,18 +510,21 @@ bail: | |||
| 506 | * | 510 | * |
| 507 | * called like this: dio->get_blocks(dio->inode, fs_startblk, | 511 | * called like this: dio->get_blocks(dio->inode, fs_startblk, |
| 508 | * fs_count, map_bh, dio->rw == WRITE); | 512 | * fs_count, map_bh, dio->rw == WRITE); |
| 509 | * | ||
| 510 | * Note that we never bother to allocate blocks here, and thus ignore the | ||
| 511 | * create argument. | ||
| 512 | */ | 513 | */ |
| 513 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | 514 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, |
| 514 | struct buffer_head *bh_result, int create) | 515 | struct buffer_head *bh_result, int create) |
| 515 | { | 516 | { |
| 516 | int ret; | 517 | int ret; |
| 518 | u32 cpos = 0; | ||
| 519 | int alloc_locked = 0; | ||
| 517 | u64 p_blkno, inode_blocks, contig_blocks; | 520 | u64 p_blkno, inode_blocks, contig_blocks; |
| 518 | unsigned int ext_flags; | 521 | unsigned int ext_flags; |
| 519 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; | 522 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; |
| 520 | unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; | 523 | unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; |
| 524 | unsigned long len = bh_result->b_size; | ||
| 525 | unsigned int clusters_to_alloc = 0; | ||
| 526 | |||
| 527 | cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock); | ||
| 521 | 528 | ||
| 522 | /* This function won't even be called if the request isn't all | 529 | /* This function won't even be called if the request isn't all |
| 523 | * nicely aligned and of the right size, so there's no need | 530 | * nicely aligned and of the right size, so there's no need |
| @@ -539,6 +546,40 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
| 539 | /* We should already CoW the refcounted extent in case of create. */ | 546 | /* We should already CoW the refcounted extent in case of create. */ |
| 540 | BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED)); | 547 | BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED)); |
| 541 | 548 | ||
| 549 | /* allocate blocks if no p_blkno is found, and create == 1 */ | ||
| 550 | if (!p_blkno && create) { | ||
| 551 | ret = ocfs2_inode_lock(inode, NULL, 1); | ||
| 552 | if (ret < 0) { | ||
| 553 | mlog_errno(ret); | ||
| 554 | goto bail; | ||
| 555 | } | ||
| 556 | |||
| 557 | alloc_locked = 1; | ||
| 558 | |||
| 559 | /* fill hole, allocate blocks can't be larger than the size | ||
| 560 | * of the hole */ | ||
| 561 | clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len); | ||
| 562 | if (clusters_to_alloc > contig_blocks) | ||
| 563 | clusters_to_alloc = contig_blocks; | ||
| 564 | |||
| 565 | /* allocate extent and insert them into the extent tree */ | ||
| 566 | ret = ocfs2_extend_allocation(inode, cpos, | ||
| 567 | clusters_to_alloc, 0); | ||
| 568 | if (ret < 0) { | ||
| 569 | mlog_errno(ret); | ||
| 570 | goto bail; | ||
| 571 | } | ||
| 572 | |||
| 573 | ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, | ||
| 574 | &contig_blocks, &ext_flags); | ||
| 575 | if (ret < 0) { | ||
| 576 | mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", | ||
| 577 | (unsigned long long)iblock); | ||
| 578 | ret = -EIO; | ||
| 579 | goto bail; | ||
| 580 | } | ||
| 581 | } | ||
| 582 | |||
| 542 | /* | 583 | /* |
| 543 | * get_more_blocks() expects us to describe a hole by clearing | 584 | * get_more_blocks() expects us to describe a hole by clearing |
| 544 | * the mapped bit on bh_result(). | 585 | * the mapped bit on bh_result(). |
| @@ -556,6 +597,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
| 556 | contig_blocks = max_blocks; | 597 | contig_blocks = max_blocks; |
| 557 | bh_result->b_size = contig_blocks << blocksize_bits; | 598 | bh_result->b_size = contig_blocks << blocksize_bits; |
| 558 | bail: | 599 | bail: |
| 600 | if (alloc_locked) | ||
| 601 | ocfs2_inode_unlock(inode, 1); | ||
| 559 | return ret; | 602 | return ret; |
| 560 | } | 603 | } |
| 561 | 604 | ||
| @@ -597,6 +640,184 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) | |||
| 597 | return try_to_free_buffers(page); | 640 | return try_to_free_buffers(page); |
| 598 | } | 641 | } |
| 599 | 642 | ||
| 643 | static int ocfs2_is_overwrite(struct ocfs2_super *osb, | ||
| 644 | struct inode *inode, loff_t offset) | ||
| 645 | { | ||
| 646 | int ret = 0; | ||
| 647 | u32 v_cpos = 0; | ||
| 648 | u32 p_cpos = 0; | ||
| 649 | unsigned int num_clusters = 0; | ||
| 650 | unsigned int ext_flags = 0; | ||
| 651 | |||
| 652 | v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); | ||
| 653 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, | ||
| 654 | &num_clusters, &ext_flags); | ||
| 655 | if (ret < 0) { | ||
| 656 | mlog_errno(ret); | ||
| 657 | return ret; | ||
| 658 | } | ||
| 659 | |||
| 660 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | ||
| 661 | return 1; | ||
| 662 | |||
| 663 | return 0; | ||
| 664 | } | ||
| 665 | |||
| 666 | static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | ||
| 667 | struct iov_iter *iter, | ||
| 668 | loff_t offset) | ||
| 669 | { | ||
| 670 | ssize_t ret = 0; | ||
| 671 | ssize_t written = 0; | ||
| 672 | bool orphaned = false; | ||
| 673 | int is_overwrite = 0; | ||
| 674 | struct file *file = iocb->ki_filp; | ||
| 675 | struct inode *inode = file_inode(file)->i_mapping->host; | ||
| 676 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 677 | struct buffer_head *di_bh = NULL; | ||
| 678 | size_t count = iter->count; | ||
| 679 | journal_t *journal = osb->journal->j_journal; | ||
| 680 | u32 zero_len; | ||
| 681 | int cluster_align; | ||
| 682 | loff_t final_size = offset + count; | ||
| 683 | int append_write = offset >= i_size_read(inode) ? 1 : 0; | ||
| 684 | unsigned int num_clusters = 0; | ||
| 685 | unsigned int ext_flags = 0; | ||
| 686 | |||
| 687 | { | ||
| 688 | u64 o = offset; | ||
| 689 | |||
| 690 | zero_len = do_div(o, 1 << osb->s_clustersize_bits); | ||
| 691 | cluster_align = !zero_len; | ||
| 692 | } | ||
| 693 | |||
| 694 | /* | ||
| 695 | * when final_size > inode->i_size, inode->i_size will be | ||
| 696 | * updated after direct write, so add the inode to orphan | ||
| 697 | * dir first. | ||
| 698 | */ | ||
| 699 | if (final_size > i_size_read(inode)) { | ||
| 700 | ret = ocfs2_add_inode_to_orphan(osb, inode); | ||
| 701 | if (ret < 0) { | ||
| 702 | mlog_errno(ret); | ||
| 703 | goto out; | ||
| 704 | } | ||
| 705 | orphaned = true; | ||
| 706 | } | ||
| 707 | |||
| 708 | if (append_write) { | ||
| 709 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
| 710 | if (ret < 0) { | ||
| 711 | mlog_errno(ret); | ||
| 712 | goto clean_orphan; | ||
| 713 | } | ||
| 714 | |||
| 715 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | ||
| 716 | ret = ocfs2_zero_extend(inode, di_bh, offset); | ||
| 717 | else | ||
| 718 | ret = ocfs2_extend_no_holes(inode, di_bh, offset, | ||
| 719 | offset); | ||
| 720 | if (ret < 0) { | ||
| 721 | mlog_errno(ret); | ||
| 722 | ocfs2_inode_unlock(inode, 1); | ||
| 723 | brelse(di_bh); | ||
| 724 | goto clean_orphan; | ||
| 725 | } | ||
| 726 | |||
| 727 | is_overwrite = ocfs2_is_overwrite(osb, inode, offset); | ||
| 728 | if (is_overwrite < 0) { | ||
| 729 | mlog_errno(is_overwrite); | ||
| 730 | ocfs2_inode_unlock(inode, 1); | ||
| 731 | brelse(di_bh); | ||
| 732 | goto clean_orphan; | ||
| 733 | } | ||
| 734 | |||
| 735 | ocfs2_inode_unlock(inode, 1); | ||
| 736 | brelse(di_bh); | ||
| 737 | di_bh = NULL; | ||
| 738 | } | ||
| 739 | |||
| 740 | written = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev, | ||
| 741 | iter, offset, | ||
| 742 | ocfs2_direct_IO_get_blocks, | ||
| 743 | ocfs2_dio_end_io, NULL, 0); | ||
| 744 | if (unlikely(written < 0)) { | ||
| 745 | loff_t i_size = i_size_read(inode); | ||
| 746 | |||
| 747 | if (offset + count > i_size) { | ||
| 748 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
| 749 | if (ret < 0) { | ||
| 750 | mlog_errno(ret); | ||
| 751 | goto clean_orphan; | ||
| 752 | } | ||
| 753 | |||
| 754 | if (i_size == i_size_read(inode)) { | ||
| 755 | ret = ocfs2_truncate_file(inode, di_bh, | ||
| 756 | i_size); | ||
| 757 | if (ret < 0) { | ||
| 758 | if (ret != -ENOSPC) | ||
| 759 | mlog_errno(ret); | ||
| 760 | |||
| 761 | ocfs2_inode_unlock(inode, 1); | ||
| 762 | brelse(di_bh); | ||
| 763 | goto clean_orphan; | ||
| 764 | } | ||
| 765 | } | ||
| 766 | |||
| 767 | ocfs2_inode_unlock(inode, 1); | ||
| 768 | brelse(di_bh); | ||
| 769 | |||
| 770 | ret = jbd2_journal_force_commit(journal); | ||
| 771 | if (ret < 0) | ||
| 772 | mlog_errno(ret); | ||
| 773 | } | ||
| 774 | } else if (written < 0 && append_write && !is_overwrite && | ||
| 775 | !cluster_align) { | ||
| 776 | u32 p_cpos = 0; | ||
| 777 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); | ||
| 778 | |||
| 779 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, | ||
| 780 | &num_clusters, &ext_flags); | ||
| 781 | if (ret < 0) { | ||
| 782 | mlog_errno(ret); | ||
| 783 | goto clean_orphan; | ||
| 784 | } | ||
| 785 | |||
| 786 | BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)); | ||
| 787 | |||
| 788 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, | ||
| 789 | p_cpos << (osb->s_clustersize_bits - 9), | ||
| 790 | zero_len >> 9, GFP_KERNEL, false); | ||
| 791 | if (ret < 0) | ||
| 792 | mlog_errno(ret); | ||
| 793 | } | ||
| 794 | |||
| 795 | clean_orphan: | ||
| 796 | if (orphaned) { | ||
| 797 | int tmp_ret; | ||
| 798 | int update_isize = written > 0 ? 1 : 0; | ||
| 799 | loff_t end = update_isize ? offset + written : 0; | ||
| 800 | |||
| 801 | tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, | ||
| 802 | update_isize, end); | ||
| 803 | if (tmp_ret < 0) { | ||
| 804 | ret = tmp_ret; | ||
| 805 | goto out; | ||
| 806 | } | ||
| 807 | |||
| 808 | tmp_ret = jbd2_journal_force_commit(journal); | ||
| 809 | if (tmp_ret < 0) { | ||
| 810 | ret = tmp_ret; | ||
| 811 | mlog_errno(tmp_ret); | ||
| 812 | } | ||
| 813 | } | ||
| 814 | |||
| 815 | out: | ||
| 816 | if (ret >= 0) | ||
| 817 | ret = written; | ||
| 818 | return ret; | ||
| 819 | } | ||
| 820 | |||
| 600 | static ssize_t ocfs2_direct_IO(int rw, | 821 | static ssize_t ocfs2_direct_IO(int rw, |
| 601 | struct kiocb *iocb, | 822 | struct kiocb *iocb, |
| 602 | struct iov_iter *iter, | 823 | struct iov_iter *iter, |
| @@ -604,6 +825,9 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
| 604 | { | 825 | { |
| 605 | struct file *file = iocb->ki_filp; | 826 | struct file *file = iocb->ki_filp; |
| 606 | struct inode *inode = file_inode(file)->i_mapping->host; | 827 | struct inode *inode = file_inode(file)->i_mapping->host; |
| 828 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 829 | int full_coherency = !(osb->s_mount_opt & | ||
| 830 | OCFS2_MOUNT_COHERENCY_BUFFERED); | ||
| 607 | 831 | ||
| 608 | /* | 832 | /* |
| 609 | * Fallback to buffered I/O if we see an inode without | 833 | * Fallback to buffered I/O if we see an inode without |
| @@ -612,14 +836,20 @@ static ssize_t ocfs2_direct_IO(int rw, | |||
| 612 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 836 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
| 613 | return 0; | 837 | return 0; |
| 614 | 838 | ||
| 615 | /* Fallback to buffered I/O if we are appending. */ | 839 | /* Fallback to buffered I/O if we are appending and |
| 616 | if (i_size_read(inode) <= offset) | 840 | * concurrent O_DIRECT writes are allowed. |
| 841 | */ | ||
| 842 | if (i_size_read(inode) <= offset && !full_coherency) | ||
| 617 | return 0; | 843 | return 0; |
| 618 | 844 | ||
| 619 | return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, | 845 | if (rw == READ) |
| 846 | return __blockdev_direct_IO(rw, iocb, inode, | ||
| 847 | inode->i_sb->s_bdev, | ||
| 620 | iter, offset, | 848 | iter, offset, |
| 621 | ocfs2_direct_IO_get_blocks, | 849 | ocfs2_direct_IO_get_blocks, |
| 622 | ocfs2_dio_end_io, NULL, 0); | 850 | ocfs2_dio_end_io, NULL, 0); |
| 851 | else | ||
| 852 | return ocfs2_direct_IO_write(iocb, iter, offset); | ||
| 623 | } | 853 | } |
| 624 | 854 | ||
| 625 | static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, | 855 | static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, |
