diff options
Diffstat (limited to 'fs/ocfs2/aops.c')
-rw-r--r-- | fs/ocfs2/aops.c | 1141 |
1 files changed, 561 insertions, 580 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index cda0361e95a4..1581240a7ca0 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -499,153 +499,6 @@ bail: | |||
499 | return status; | 499 | return status; |
500 | } | 500 | } |
501 | 501 | ||
502 | /* | ||
503 | * TODO: Make this into a generic get_blocks function. | ||
504 | * | ||
505 | * From do_direct_io in direct-io.c: | ||
506 | * "So what we do is to permit the ->get_blocks function to populate | ||
507 | * bh.b_size with the size of IO which is permitted at this offset and | ||
508 | * this i_blkbits." | ||
509 | * | ||
510 | * This function is called directly from get_more_blocks in direct-io.c. | ||
511 | * | ||
512 | * called like this: dio->get_blocks(dio->inode, fs_startblk, | ||
513 | * fs_count, map_bh, dio->rw == WRITE); | ||
514 | */ | ||
515 | static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | ||
516 | struct buffer_head *bh_result, int create) | ||
517 | { | ||
518 | int ret; | ||
519 | u32 cpos = 0; | ||
520 | int alloc_locked = 0; | ||
521 | u64 p_blkno, inode_blocks, contig_blocks; | ||
522 | unsigned int ext_flags; | ||
523 | unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits; | ||
524 | unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits; | ||
525 | unsigned long len = bh_result->b_size; | ||
526 | unsigned int clusters_to_alloc = 0, contig_clusters = 0; | ||
527 | |||
528 | cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock); | ||
529 | |||
530 | /* This function won't even be called if the request isn't all | ||
531 | * nicely aligned and of the right size, so there's no need | ||
532 | * for us to check any of that. */ | ||
533 | |||
534 | inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); | ||
535 | |||
536 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
537 | |||
538 | /* This figures out the size of the next contiguous block, and | ||
539 | * our logical offset */ | ||
540 | ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, | ||
541 | &contig_blocks, &ext_flags); | ||
542 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
543 | |||
544 | if (ret) { | ||
545 | mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", | ||
546 | (unsigned long long)iblock); | ||
547 | ret = -EIO; | ||
548 | goto bail; | ||
549 | } | ||
550 | |||
551 | /* We should already CoW the refcounted extent in case of create. */ | ||
552 | BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED)); | ||
553 | |||
554 | /* allocate blocks if no p_blkno is found, and create == 1 */ | ||
555 | if (!p_blkno && create) { | ||
556 | ret = ocfs2_inode_lock(inode, NULL, 1); | ||
557 | if (ret < 0) { | ||
558 | mlog_errno(ret); | ||
559 | goto bail; | ||
560 | } | ||
561 | |||
562 | alloc_locked = 1; | ||
563 | |||
564 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
565 | |||
566 | /* fill hole, allocate blocks can't be larger than the size | ||
567 | * of the hole */ | ||
568 | clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len); | ||
569 | contig_clusters = ocfs2_clusters_for_blocks(inode->i_sb, | ||
570 | contig_blocks); | ||
571 | if (clusters_to_alloc > contig_clusters) | ||
572 | clusters_to_alloc = contig_clusters; | ||
573 | |||
574 | /* allocate extent and insert them into the extent tree */ | ||
575 | ret = ocfs2_extend_allocation(inode, cpos, | ||
576 | clusters_to_alloc, 0); | ||
577 | if (ret < 0) { | ||
578 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
579 | mlog_errno(ret); | ||
580 | goto bail; | ||
581 | } | ||
582 | |||
583 | ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, | ||
584 | &contig_blocks, &ext_flags); | ||
585 | if (ret < 0) { | ||
586 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
587 | mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n", | ||
588 | (unsigned long long)iblock); | ||
589 | ret = -EIO; | ||
590 | goto bail; | ||
591 | } | ||
592 | set_buffer_new(bh_result); | ||
593 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
594 | } | ||
595 | |||
596 | /* | ||
597 | * get_more_blocks() expects us to describe a hole by clearing | ||
598 | * the mapped bit on bh_result(). | ||
599 | * | ||
600 | * Consider an unwritten extent as a hole. | ||
601 | */ | ||
602 | if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | ||
603 | map_bh(bh_result, inode->i_sb, p_blkno); | ||
604 | else | ||
605 | clear_buffer_mapped(bh_result); | ||
606 | |||
607 | /* make sure we don't map more than max_blocks blocks here as | ||
608 | that's all the kernel will handle at this point. */ | ||
609 | if (max_blocks < contig_blocks) | ||
610 | contig_blocks = max_blocks; | ||
611 | bh_result->b_size = contig_blocks << blocksize_bits; | ||
612 | bail: | ||
613 | if (alloc_locked) | ||
614 | ocfs2_inode_unlock(inode, 1); | ||
615 | return ret; | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're | ||
620 | * particularly interested in the aio/dio case. We use the rw_lock DLM lock | ||
621 | * to protect io on one node from truncation on another. | ||
622 | */ | ||
623 | static void ocfs2_dio_end_io(struct kiocb *iocb, | ||
624 | loff_t offset, | ||
625 | ssize_t bytes, | ||
626 | void *private) | ||
627 | { | ||
628 | struct inode *inode = file_inode(iocb->ki_filp); | ||
629 | int level; | ||
630 | |||
631 | /* this io's submitter should not have unlocked this before we could */ | ||
632 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); | ||
633 | |||
634 | if (ocfs2_iocb_is_unaligned_aio(iocb)) { | ||
635 | ocfs2_iocb_clear_unaligned_aio(iocb); | ||
636 | |||
637 | mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); | ||
638 | } | ||
639 | |||
640 | /* Let rw unlock to be done later to protect append direct io write */ | ||
641 | if (offset + bytes <= i_size_read(inode)) { | ||
642 | ocfs2_iocb_clear_rw_locked(iocb); | ||
643 | |||
644 | level = ocfs2_iocb_rw_locked_level(iocb); | ||
645 | ocfs2_rw_unlock(inode, level); | ||
646 | } | ||
647 | } | ||
648 | |||
649 | static int ocfs2_releasepage(struct page *page, gfp_t wait) | 502 | static int ocfs2_releasepage(struct page *page, gfp_t wait) |
650 | { | 503 | { |
651 | if (!page_has_buffers(page)) | 504 | if (!page_has_buffers(page)) |
@@ -653,363 +506,6 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) | |||
653 | return try_to_free_buffers(page); | 506 | return try_to_free_buffers(page); |
654 | } | 507 | } |
655 | 508 | ||
656 | static int ocfs2_is_overwrite(struct ocfs2_super *osb, | ||
657 | struct inode *inode, loff_t offset) | ||
658 | { | ||
659 | int ret = 0; | ||
660 | u32 v_cpos = 0; | ||
661 | u32 p_cpos = 0; | ||
662 | unsigned int num_clusters = 0; | ||
663 | unsigned int ext_flags = 0; | ||
664 | |||
665 | v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); | ||
666 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, | ||
667 | &num_clusters, &ext_flags); | ||
668 | if (ret < 0) { | ||
669 | mlog_errno(ret); | ||
670 | return ret; | ||
671 | } | ||
672 | |||
673 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) | ||
674 | return 1; | ||
675 | |||
676 | return 0; | ||
677 | } | ||
678 | |||
679 | static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, | ||
680 | struct inode *inode, loff_t offset, | ||
681 | u64 zero_len, int cluster_align) | ||
682 | { | ||
683 | u32 p_cpos = 0; | ||
684 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); | ||
685 | unsigned int num_clusters = 0; | ||
686 | unsigned int ext_flags = 0; | ||
687 | int ret = 0; | ||
688 | |||
689 | if (offset <= i_size_read(inode) || cluster_align) | ||
690 | return 0; | ||
691 | |||
692 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, | ||
693 | &ext_flags); | ||
694 | if (ret < 0) { | ||
695 | mlog_errno(ret); | ||
696 | return ret; | ||
697 | } | ||
698 | |||
699 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
700 | u64 s = i_size_read(inode); | ||
701 | sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) + | ||
702 | (do_div(s, osb->s_clustersize) >> 9); | ||
703 | |||
704 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, | ||
705 | zero_len >> 9, GFP_NOFS, false); | ||
706 | if (ret < 0) | ||
707 | mlog_errno(ret); | ||
708 | } | ||
709 | |||
710 | return ret; | ||
711 | } | ||
712 | |||
713 | static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb, | ||
714 | struct inode *inode, loff_t offset) | ||
715 | { | ||
716 | u64 zero_start, zero_len, total_zero_len; | ||
717 | u32 p_cpos = 0, clusters_to_add; | ||
718 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode)); | ||
719 | unsigned int num_clusters = 0; | ||
720 | unsigned int ext_flags = 0; | ||
721 | u32 size_div, offset_div; | ||
722 | int ret = 0; | ||
723 | |||
724 | { | ||
725 | u64 o = offset; | ||
726 | u64 s = i_size_read(inode); | ||
727 | |||
728 | offset_div = do_div(o, osb->s_clustersize); | ||
729 | size_div = do_div(s, osb->s_clustersize); | ||
730 | } | ||
731 | |||
732 | if (offset <= i_size_read(inode)) | ||
733 | return 0; | ||
734 | |||
735 | clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) - | ||
736 | ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode)); | ||
737 | total_zero_len = offset - i_size_read(inode); | ||
738 | if (clusters_to_add) | ||
739 | total_zero_len -= offset_div; | ||
740 | |||
741 | /* Allocate clusters to fill out holes, and this is only needed | ||
742 | * when we add more than one clusters. Otherwise the cluster will | ||
743 | * be allocated during direct IO */ | ||
744 | if (clusters_to_add > 1) { | ||
745 | ret = ocfs2_extend_allocation(inode, | ||
746 | OCFS2_I(inode)->ip_clusters, | ||
747 | clusters_to_add - 1, 0); | ||
748 | if (ret) { | ||
749 | mlog_errno(ret); | ||
750 | goto out; | ||
751 | } | ||
752 | } | ||
753 | |||
754 | while (total_zero_len) { | ||
755 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters, | ||
756 | &ext_flags); | ||
757 | if (ret < 0) { | ||
758 | mlog_errno(ret); | ||
759 | goto out; | ||
760 | } | ||
761 | |||
762 | zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) + | ||
763 | size_div; | ||
764 | zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) - | ||
765 | size_div; | ||
766 | zero_len = min(total_zero_len, zero_len); | ||
767 | |||
768 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
769 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, | ||
770 | zero_start >> 9, zero_len >> 9, | ||
771 | GFP_NOFS, false); | ||
772 | if (ret < 0) { | ||
773 | mlog_errno(ret); | ||
774 | goto out; | ||
775 | } | ||
776 | } | ||
777 | |||
778 | total_zero_len -= zero_len; | ||
779 | v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div); | ||
780 | |||
781 | /* Only at first iteration can be cluster not aligned. | ||
782 | * So set size_div to 0 for the rest */ | ||
783 | size_div = 0; | ||
784 | } | ||
785 | |||
786 | out: | ||
787 | return ret; | ||
788 | } | ||
789 | |||
790 | static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, | ||
791 | struct iov_iter *iter, | ||
792 | loff_t offset) | ||
793 | { | ||
794 | ssize_t ret = 0; | ||
795 | ssize_t written = 0; | ||
796 | bool orphaned = false; | ||
797 | int is_overwrite = 0; | ||
798 | struct file *file = iocb->ki_filp; | ||
799 | struct inode *inode = file_inode(file)->i_mapping->host; | ||
800 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
801 | struct buffer_head *di_bh = NULL; | ||
802 | size_t count = iter->count; | ||
803 | journal_t *journal = osb->journal->j_journal; | ||
804 | u64 zero_len_head, zero_len_tail; | ||
805 | int cluster_align_head, cluster_align_tail; | ||
806 | loff_t final_size = offset + count; | ||
807 | int append_write = offset >= i_size_read(inode) ? 1 : 0; | ||
808 | unsigned int num_clusters = 0; | ||
809 | unsigned int ext_flags = 0; | ||
810 | |||
811 | { | ||
812 | u64 o = offset; | ||
813 | u64 s = i_size_read(inode); | ||
814 | |||
815 | zero_len_head = do_div(o, 1 << osb->s_clustersize_bits); | ||
816 | cluster_align_head = !zero_len_head; | ||
817 | |||
818 | zero_len_tail = osb->s_clustersize - | ||
819 | do_div(s, osb->s_clustersize); | ||
820 | if ((offset - i_size_read(inode)) < zero_len_tail) | ||
821 | zero_len_tail = offset - i_size_read(inode); | ||
822 | cluster_align_tail = !zero_len_tail; | ||
823 | } | ||
824 | |||
825 | /* | ||
826 | * when final_size > inode->i_size, inode->i_size will be | ||
827 | * updated after direct write, so add the inode to orphan | ||
828 | * dir first. | ||
829 | */ | ||
830 | if (final_size > i_size_read(inode)) { | ||
831 | ret = ocfs2_add_inode_to_orphan(osb, inode); | ||
832 | if (ret < 0) { | ||
833 | mlog_errno(ret); | ||
834 | goto out; | ||
835 | } | ||
836 | orphaned = true; | ||
837 | } | ||
838 | |||
839 | if (append_write) { | ||
840 | ret = ocfs2_inode_lock(inode, NULL, 1); | ||
841 | if (ret < 0) { | ||
842 | mlog_errno(ret); | ||
843 | goto clean_orphan; | ||
844 | } | ||
845 | |||
846 | /* zeroing out the previously allocated cluster tail | ||
847 | * that but not zeroed */ | ||
848 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { | ||
849 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
850 | ret = ocfs2_direct_IO_zero_extend(osb, inode, offset, | ||
851 | zero_len_tail, cluster_align_tail); | ||
852 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
853 | } else { | ||
854 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
855 | ret = ocfs2_direct_IO_extend_no_holes(osb, inode, | ||
856 | offset); | ||
857 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
858 | } | ||
859 | if (ret < 0) { | ||
860 | mlog_errno(ret); | ||
861 | ocfs2_inode_unlock(inode, 1); | ||
862 | goto clean_orphan; | ||
863 | } | ||
864 | |||
865 | is_overwrite = ocfs2_is_overwrite(osb, inode, offset); | ||
866 | if (is_overwrite < 0) { | ||
867 | mlog_errno(is_overwrite); | ||
868 | ret = is_overwrite; | ||
869 | ocfs2_inode_unlock(inode, 1); | ||
870 | goto clean_orphan; | ||
871 | } | ||
872 | |||
873 | ocfs2_inode_unlock(inode, 1); | ||
874 | } | ||
875 | |||
876 | written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, | ||
877 | offset, ocfs2_direct_IO_get_blocks, | ||
878 | ocfs2_dio_end_io, NULL, 0); | ||
879 | /* overwrite aio may return -EIOCBQUEUED, and it is not an error */ | ||
880 | if ((written < 0) && (written != -EIOCBQUEUED)) { | ||
881 | loff_t i_size = i_size_read(inode); | ||
882 | |||
883 | if (offset + count > i_size) { | ||
884 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
885 | if (ret < 0) { | ||
886 | mlog_errno(ret); | ||
887 | goto clean_orphan; | ||
888 | } | ||
889 | |||
890 | if (i_size == i_size_read(inode)) { | ||
891 | ret = ocfs2_truncate_file(inode, di_bh, | ||
892 | i_size); | ||
893 | if (ret < 0) { | ||
894 | if (ret != -ENOSPC) | ||
895 | mlog_errno(ret); | ||
896 | |||
897 | ocfs2_inode_unlock(inode, 1); | ||
898 | brelse(di_bh); | ||
899 | di_bh = NULL; | ||
900 | goto clean_orphan; | ||
901 | } | ||
902 | } | ||
903 | |||
904 | ocfs2_inode_unlock(inode, 1); | ||
905 | brelse(di_bh); | ||
906 | di_bh = NULL; | ||
907 | |||
908 | ret = jbd2_journal_force_commit(journal); | ||
909 | if (ret < 0) | ||
910 | mlog_errno(ret); | ||
911 | } | ||
912 | } else if (written > 0 && append_write && !is_overwrite && | ||
913 | !cluster_align_head) { | ||
914 | /* zeroing out the allocated cluster head */ | ||
915 | u32 p_cpos = 0; | ||
916 | u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset); | ||
917 | |||
918 | ret = ocfs2_inode_lock(inode, NULL, 0); | ||
919 | if (ret < 0) { | ||
920 | mlog_errno(ret); | ||
921 | goto clean_orphan; | ||
922 | } | ||
923 | |||
924 | ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, | ||
925 | &num_clusters, &ext_flags); | ||
926 | if (ret < 0) { | ||
927 | mlog_errno(ret); | ||
928 | ocfs2_inode_unlock(inode, 0); | ||
929 | goto clean_orphan; | ||
930 | } | ||
931 | |||
932 | BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)); | ||
933 | |||
934 | ret = blkdev_issue_zeroout(osb->sb->s_bdev, | ||
935 | (u64)p_cpos << (osb->s_clustersize_bits - 9), | ||
936 | zero_len_head >> 9, GFP_NOFS, false); | ||
937 | if (ret < 0) | ||
938 | mlog_errno(ret); | ||
939 | |||
940 | ocfs2_inode_unlock(inode, 0); | ||
941 | } | ||
942 | |||
943 | clean_orphan: | ||
944 | if (orphaned) { | ||
945 | int tmp_ret; | ||
946 | int update_isize = written > 0 ? 1 : 0; | ||
947 | loff_t end = update_isize ? offset + written : 0; | ||
948 | |||
949 | tmp_ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
950 | if (tmp_ret < 0) { | ||
951 | ret = tmp_ret; | ||
952 | mlog_errno(ret); | ||
953 | goto out; | ||
954 | } | ||
955 | |||
956 | tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, | ||
957 | update_isize, end); | ||
958 | if (tmp_ret < 0) { | ||
959 | ocfs2_inode_unlock(inode, 1); | ||
960 | ret = tmp_ret; | ||
961 | mlog_errno(ret); | ||
962 | brelse(di_bh); | ||
963 | goto out; | ||
964 | } | ||
965 | |||
966 | ocfs2_inode_unlock(inode, 1); | ||
967 | brelse(di_bh); | ||
968 | |||
969 | tmp_ret = jbd2_journal_force_commit(journal); | ||
970 | if (tmp_ret < 0) { | ||
971 | ret = tmp_ret; | ||
972 | mlog_errno(tmp_ret); | ||
973 | } | ||
974 | } | ||
975 | |||
976 | out: | ||
977 | if (ret >= 0) | ||
978 | ret = written; | ||
979 | return ret; | ||
980 | } | ||
981 | |||
982 | static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | ||
983 | loff_t offset) | ||
984 | { | ||
985 | struct file *file = iocb->ki_filp; | ||
986 | struct inode *inode = file_inode(file)->i_mapping->host; | ||
987 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
988 | int full_coherency = !(osb->s_mount_opt & | ||
989 | OCFS2_MOUNT_COHERENCY_BUFFERED); | ||
990 | |||
991 | /* | ||
992 | * Fallback to buffered I/O if we see an inode without | ||
993 | * extents. | ||
994 | */ | ||
995 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
996 | return 0; | ||
997 | |||
998 | /* Fallback to buffered I/O if we are appending and | ||
999 | * concurrent O_DIRECT writes are allowed. | ||
1000 | */ | ||
1001 | if (i_size_read(inode) <= offset && !full_coherency) | ||
1002 | return 0; | ||
1003 | |||
1004 | if (iov_iter_rw(iter) == READ) | ||
1005 | return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, | ||
1006 | iter, offset, | ||
1007 | ocfs2_direct_IO_get_blocks, | ||
1008 | ocfs2_dio_end_io, NULL, 0); | ||
1009 | else | ||
1010 | return ocfs2_direct_IO_write(iocb, iter, offset); | ||
1011 | } | ||
1012 | |||
1013 | static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, | 509 | static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, |
1014 | u32 cpos, | 510 | u32 cpos, |
1015 | unsigned int *start, | 511 | unsigned int *start, |
@@ -1196,6 +692,13 @@ next_bh: | |||
1196 | 692 | ||
1197 | #define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE) | 693 | #define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE) |
1198 | 694 | ||
695 | struct ocfs2_unwritten_extent { | ||
696 | struct list_head ue_node; | ||
697 | struct list_head ue_ip_node; | ||
698 | u32 ue_cpos; | ||
699 | u32 ue_phys; | ||
700 | }; | ||
701 | |||
1199 | /* | 702 | /* |
1200 | * Describe the state of a single cluster to be written to. | 703 | * Describe the state of a single cluster to be written to. |
1201 | */ | 704 | */ |
@@ -1207,7 +710,7 @@ struct ocfs2_write_cluster_desc { | |||
1207 | * filled. | 710 | * filled. |
1208 | */ | 711 | */ |
1209 | unsigned c_new; | 712 | unsigned c_new; |
1210 | unsigned c_unwritten; | 713 | unsigned c_clear_unwritten; |
1211 | unsigned c_needs_zero; | 714 | unsigned c_needs_zero; |
1212 | }; | 715 | }; |
1213 | 716 | ||
@@ -1219,6 +722,9 @@ struct ocfs2_write_ctxt { | |||
1219 | /* First cluster allocated in a nonsparse extend */ | 722 | /* First cluster allocated in a nonsparse extend */ |
1220 | u32 w_first_new_cpos; | 723 | u32 w_first_new_cpos; |
1221 | 724 | ||
725 | /* Type of caller. Must be one of buffer, mmap, direct. */ | ||
726 | ocfs2_write_type_t w_type; | ||
727 | |||
1222 | struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; | 728 | struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; |
1223 | 729 | ||
1224 | /* | 730 | /* |
@@ -1267,6 +773,8 @@ struct ocfs2_write_ctxt { | |||
1267 | struct buffer_head *w_di_bh; | 773 | struct buffer_head *w_di_bh; |
1268 | 774 | ||
1269 | struct ocfs2_cached_dealloc_ctxt w_dealloc; | 775 | struct ocfs2_cached_dealloc_ctxt w_dealloc; |
776 | |||
777 | struct list_head w_unwritten_list; | ||
1270 | }; | 778 | }; |
1271 | 779 | ||
1272 | void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) | 780 | void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) |
@@ -1305,8 +813,25 @@ static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc) | |||
1305 | ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); | 813 | ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); |
1306 | } | 814 | } |
1307 | 815 | ||
1308 | static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) | 816 | static void ocfs2_free_unwritten_list(struct inode *inode, |
817 | struct list_head *head) | ||
1309 | { | 818 | { |
819 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
820 | struct ocfs2_unwritten_extent *ue = NULL, *tmp = NULL; | ||
821 | |||
822 | list_for_each_entry_safe(ue, tmp, head, ue_node) { | ||
823 | list_del(&ue->ue_node); | ||
824 | spin_lock(&oi->ip_lock); | ||
825 | list_del(&ue->ue_ip_node); | ||
826 | spin_unlock(&oi->ip_lock); | ||
827 | kfree(ue); | ||
828 | } | ||
829 | } | ||
830 | |||
831 | static void ocfs2_free_write_ctxt(struct inode *inode, | ||
832 | struct ocfs2_write_ctxt *wc) | ||
833 | { | ||
834 | ocfs2_free_unwritten_list(inode, &wc->w_unwritten_list); | ||
1310 | ocfs2_unlock_pages(wc); | 835 | ocfs2_unlock_pages(wc); |
1311 | brelse(wc->w_di_bh); | 836 | brelse(wc->w_di_bh); |
1312 | kfree(wc); | 837 | kfree(wc); |
@@ -1314,7 +839,8 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) | |||
1314 | 839 | ||
1315 | static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, | 840 | static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, |
1316 | struct ocfs2_super *osb, loff_t pos, | 841 | struct ocfs2_super *osb, loff_t pos, |
1317 | unsigned len, struct buffer_head *di_bh) | 842 | unsigned len, ocfs2_write_type_t type, |
843 | struct buffer_head *di_bh) | ||
1318 | { | 844 | { |
1319 | u32 cend; | 845 | u32 cend; |
1320 | struct ocfs2_write_ctxt *wc; | 846 | struct ocfs2_write_ctxt *wc; |
@@ -1329,6 +855,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, | |||
1329 | wc->w_clen = cend - wc->w_cpos + 1; | 855 | wc->w_clen = cend - wc->w_cpos + 1; |
1330 | get_bh(di_bh); | 856 | get_bh(di_bh); |
1331 | wc->w_di_bh = di_bh; | 857 | wc->w_di_bh = di_bh; |
858 | wc->w_type = type; | ||
1332 | 859 | ||
1333 | if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) | 860 | if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) |
1334 | wc->w_large_pages = 1; | 861 | wc->w_large_pages = 1; |
@@ -1336,6 +863,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, | |||
1336 | wc->w_large_pages = 0; | 863 | wc->w_large_pages = 0; |
1337 | 864 | ||
1338 | ocfs2_init_dealloc_ctxt(&wc->w_dealloc); | 865 | ocfs2_init_dealloc_ctxt(&wc->w_dealloc); |
866 | INIT_LIST_HEAD(&wc->w_unwritten_list); | ||
1339 | 867 | ||
1340 | *wcp = wc; | 868 | *wcp = wc; |
1341 | 869 | ||
@@ -1396,12 +924,13 @@ static void ocfs2_write_failure(struct inode *inode, | |||
1396 | to = user_pos + user_len; | 924 | to = user_pos + user_len; |
1397 | struct page *tmppage; | 925 | struct page *tmppage; |
1398 | 926 | ||
1399 | ocfs2_zero_new_buffers(wc->w_target_page, from, to); | 927 | if (wc->w_target_page) |
928 | ocfs2_zero_new_buffers(wc->w_target_page, from, to); | ||
1400 | 929 | ||
1401 | for(i = 0; i < wc->w_num_pages; i++) { | 930 | for(i = 0; i < wc->w_num_pages; i++) { |
1402 | tmppage = wc->w_pages[i]; | 931 | tmppage = wc->w_pages[i]; |
1403 | 932 | ||
1404 | if (page_has_buffers(tmppage)) { | 933 | if (tmppage && page_has_buffers(tmppage)) { |
1405 | if (ocfs2_should_order_data(inode)) | 934 | if (ocfs2_should_order_data(inode)) |
1406 | ocfs2_jbd2_file_inode(wc->w_handle, inode); | 935 | ocfs2_jbd2_file_inode(wc->w_handle, inode); |
1407 | 936 | ||
@@ -1531,11 +1060,13 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, | |||
1531 | wc->w_num_pages = 1; | 1060 | wc->w_num_pages = 1; |
1532 | start = target_index; | 1061 | start = target_index; |
1533 | } | 1062 | } |
1063 | end_index = (user_pos + user_len - 1) >> PAGE_CACHE_SHIFT; | ||
1534 | 1064 | ||
1535 | for(i = 0; i < wc->w_num_pages; i++) { | 1065 | for(i = 0; i < wc->w_num_pages; i++) { |
1536 | index = start + i; | 1066 | index = start + i; |
1537 | 1067 | ||
1538 | if (index == target_index && mmap_page) { | 1068 | if (index >= target_index && index <= end_index && |
1069 | wc->w_type == OCFS2_WRITE_MMAP) { | ||
1539 | /* | 1070 | /* |
1540 | * ocfs2_pagemkwrite() is a little different | 1071 | * ocfs2_pagemkwrite() is a little different |
1541 | * and wants us to directly use the page | 1072 | * and wants us to directly use the page |
@@ -1554,6 +1085,11 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, | |||
1554 | page_cache_get(mmap_page); | 1085 | page_cache_get(mmap_page); |
1555 | wc->w_pages[i] = mmap_page; | 1086 | wc->w_pages[i] = mmap_page; |
1556 | wc->w_target_locked = true; | 1087 | wc->w_target_locked = true; |
1088 | } else if (index >= target_index && index <= end_index && | ||
1089 | wc->w_type == OCFS2_WRITE_DIRECT) { | ||
1090 | /* Direct write has no mapping page. */ | ||
1091 | wc->w_pages[i] = NULL; | ||
1092 | continue; | ||
1557 | } else { | 1093 | } else { |
1558 | wc->w_pages[i] = find_or_create_page(mapping, index, | 1094 | wc->w_pages[i] = find_or_create_page(mapping, index, |
1559 | GFP_NOFS); | 1095 | GFP_NOFS); |
@@ -1578,19 +1114,20 @@ out: | |||
1578 | * Prepare a single cluster for write one cluster into the file. | 1114 | * Prepare a single cluster for write one cluster into the file. |
1579 | */ | 1115 | */ |
1580 | static int ocfs2_write_cluster(struct address_space *mapping, | 1116 | static int ocfs2_write_cluster(struct address_space *mapping, |
1581 | u32 phys, unsigned int unwritten, | 1117 | u32 *phys, unsigned int new, |
1118 | unsigned int clear_unwritten, | ||
1582 | unsigned int should_zero, | 1119 | unsigned int should_zero, |
1583 | struct ocfs2_alloc_context *data_ac, | 1120 | struct ocfs2_alloc_context *data_ac, |
1584 | struct ocfs2_alloc_context *meta_ac, | 1121 | struct ocfs2_alloc_context *meta_ac, |
1585 | struct ocfs2_write_ctxt *wc, u32 cpos, | 1122 | struct ocfs2_write_ctxt *wc, u32 cpos, |
1586 | loff_t user_pos, unsigned user_len) | 1123 | loff_t user_pos, unsigned user_len) |
1587 | { | 1124 | { |
1588 | int ret, i, new; | 1125 | int ret, i; |
1589 | u64 v_blkno, p_blkno; | 1126 | u64 p_blkno; |
1590 | struct inode *inode = mapping->host; | 1127 | struct inode *inode = mapping->host; |
1591 | struct ocfs2_extent_tree et; | 1128 | struct ocfs2_extent_tree et; |
1129 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
1592 | 1130 | ||
1593 | new = phys == 0 ? 1 : 0; | ||
1594 | if (new) { | 1131 | if (new) { |
1595 | u32 tmp_pos; | 1132 | u32 tmp_pos; |
1596 | 1133 | ||
@@ -1600,9 +1137,9 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1600 | */ | 1137 | */ |
1601 | tmp_pos = cpos; | 1138 | tmp_pos = cpos; |
1602 | ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode, | 1139 | ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode, |
1603 | &tmp_pos, 1, 0, wc->w_di_bh, | 1140 | &tmp_pos, 1, !clear_unwritten, |
1604 | wc->w_handle, data_ac, | 1141 | wc->w_di_bh, wc->w_handle, |
1605 | meta_ac, NULL); | 1142 | data_ac, meta_ac, NULL); |
1606 | /* | 1143 | /* |
1607 | * This shouldn't happen because we must have already | 1144 | * This shouldn't happen because we must have already |
1608 | * calculated the correct meta data allocation required. The | 1145 | * calculated the correct meta data allocation required. The |
@@ -1619,11 +1156,11 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1619 | mlog_errno(ret); | 1156 | mlog_errno(ret); |
1620 | goto out; | 1157 | goto out; |
1621 | } | 1158 | } |
1622 | } else if (unwritten) { | 1159 | } else if (clear_unwritten) { |
1623 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), | 1160 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), |
1624 | wc->w_di_bh); | 1161 | wc->w_di_bh); |
1625 | ret = ocfs2_mark_extent_written(inode, &et, | 1162 | ret = ocfs2_mark_extent_written(inode, &et, |
1626 | wc->w_handle, cpos, 1, phys, | 1163 | wc->w_handle, cpos, 1, *phys, |
1627 | meta_ac, &wc->w_dealloc); | 1164 | meta_ac, &wc->w_dealloc); |
1628 | if (ret < 0) { | 1165 | if (ret < 0) { |
1629 | mlog_errno(ret); | 1166 | mlog_errno(ret); |
@@ -1631,30 +1168,33 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1631 | } | 1168 | } |
1632 | } | 1169 | } |
1633 | 1170 | ||
1634 | if (should_zero) | ||
1635 | v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos); | ||
1636 | else | ||
1637 | v_blkno = user_pos >> inode->i_sb->s_blocksize_bits; | ||
1638 | |||
1639 | /* | 1171 | /* |
1640 | * The only reason this should fail is due to an inability to | 1172 | * The only reason this should fail is due to an inability to |
1641 | * find the extent added. | 1173 | * find the extent added. |
1642 | */ | 1174 | */ |
1643 | ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, | 1175 | ret = ocfs2_get_clusters(inode, cpos, phys, NULL, NULL); |
1644 | NULL); | ||
1645 | if (ret < 0) { | 1176 | if (ret < 0) { |
1646 | mlog(ML_ERROR, "Get physical blkno failed for inode %llu, " | 1177 | mlog(ML_ERROR, "Get physical blkno failed for inode %llu, " |
1647 | "at logical block %llu", | 1178 | "at logical cluster %u", |
1648 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1179 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); |
1649 | (unsigned long long)v_blkno); | ||
1650 | goto out; | 1180 | goto out; |
1651 | } | 1181 | } |
1652 | 1182 | ||
1653 | BUG_ON(p_blkno == 0); | 1183 | BUG_ON(*phys == 0); |
1184 | |||
1185 | p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *phys); | ||
1186 | if (!should_zero) | ||
1187 | p_blkno += (user_pos >> inode->i_sb->s_blocksize_bits) & (u64)(bpc - 1); | ||
1654 | 1188 | ||
1655 | for(i = 0; i < wc->w_num_pages; i++) { | 1189 | for(i = 0; i < wc->w_num_pages; i++) { |
1656 | int tmpret; | 1190 | int tmpret; |
1657 | 1191 | ||
1192 | /* This is the direct io target page. */ | ||
1193 | if (wc->w_pages[i] == NULL) { | ||
1194 | p_blkno++; | ||
1195 | continue; | ||
1196 | } | ||
1197 | |||
1658 | tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, | 1198 | tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, |
1659 | wc->w_pages[i], cpos, | 1199 | wc->w_pages[i], cpos, |
1660 | user_pos, user_len, | 1200 | user_pos, user_len, |
@@ -1701,8 +1241,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping, | |||
1701 | if ((cluster_off + local_len) > osb->s_clustersize) | 1241 | if ((cluster_off + local_len) > osb->s_clustersize) |
1702 | local_len = osb->s_clustersize - cluster_off; | 1242 | local_len = osb->s_clustersize - cluster_off; |
1703 | 1243 | ||
1704 | ret = ocfs2_write_cluster(mapping, desc->c_phys, | 1244 | ret = ocfs2_write_cluster(mapping, &desc->c_phys, |
1705 | desc->c_unwritten, | 1245 | desc->c_new, |
1246 | desc->c_clear_unwritten, | ||
1706 | desc->c_needs_zero, | 1247 | desc->c_needs_zero, |
1707 | data_ac, meta_ac, | 1248 | data_ac, meta_ac, |
1708 | wc, desc->c_cpos, pos, local_len); | 1249 | wc, desc->c_cpos, pos, local_len); |
@@ -1773,6 +1314,66 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, | |||
1773 | } | 1314 | } |
1774 | 1315 | ||
1775 | /* | 1316 | /* |
1317 | * Check if this extent is marked UNWRITTEN by direct io. If so, we need not to | ||
1318 | * do the zero work. And should not to clear UNWRITTEN since it will be cleared | ||
1319 | * by the direct io procedure. | ||
1320 | * If this is a new extent that allocated by direct io, we should mark it in | ||
1321 | * the ip_unwritten_list. | ||
1322 | */ | ||
1323 | static int ocfs2_unwritten_check(struct inode *inode, | ||
1324 | struct ocfs2_write_ctxt *wc, | ||
1325 | struct ocfs2_write_cluster_desc *desc) | ||
1326 | { | ||
1327 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1328 | struct ocfs2_unwritten_extent *ue = NULL, *new = NULL; | ||
1329 | int ret = 0; | ||
1330 | |||
1331 | if (!desc->c_needs_zero) | ||
1332 | return 0; | ||
1333 | |||
1334 | retry: | ||
1335 | spin_lock(&oi->ip_lock); | ||
1336 | /* Needs not to zero no metter buffer or direct. The one who is zero | ||
1337 | * the cluster is doing zero. And he will clear unwritten after all | ||
1338 | * cluster io finished. */ | ||
1339 | list_for_each_entry(ue, &oi->ip_unwritten_list, ue_ip_node) { | ||
1340 | if (desc->c_cpos == ue->ue_cpos) { | ||
1341 | BUG_ON(desc->c_new); | ||
1342 | desc->c_needs_zero = 0; | ||
1343 | desc->c_clear_unwritten = 0; | ||
1344 | goto unlock; | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | if (wc->w_type != OCFS2_WRITE_DIRECT) | ||
1349 | goto unlock; | ||
1350 | |||
1351 | if (new == NULL) { | ||
1352 | spin_unlock(&oi->ip_lock); | ||
1353 | new = kmalloc(sizeof(struct ocfs2_unwritten_extent), | ||
1354 | GFP_NOFS); | ||
1355 | if (new == NULL) { | ||
1356 | ret = -ENOMEM; | ||
1357 | goto out; | ||
1358 | } | ||
1359 | goto retry; | ||
1360 | } | ||
1361 | /* This direct write will doing zero. */ | ||
1362 | new->ue_cpos = desc->c_cpos; | ||
1363 | new->ue_phys = desc->c_phys; | ||
1364 | desc->c_clear_unwritten = 0; | ||
1365 | list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list); | ||
1366 | list_add_tail(&new->ue_node, &wc->w_unwritten_list); | ||
1367 | new = NULL; | ||
1368 | unlock: | ||
1369 | spin_unlock(&oi->ip_lock); | ||
1370 | out: | ||
1371 | if (new) | ||
1372 | kfree(new); | ||
1373 | return ret; | ||
1374 | } | ||
1375 | |||
1376 | /* | ||
1776 | * Populate each single-cluster write descriptor in the write context | 1377 | * Populate each single-cluster write descriptor in the write context |
1777 | * with information about the i/o to be done. | 1378 | * with information about the i/o to be done. |
1778 | * | 1379 | * |
@@ -1847,14 +1448,21 @@ static int ocfs2_populate_write_desc(struct inode *inode, | |||
1847 | if (phys == 0) { | 1448 | if (phys == 0) { |
1848 | desc->c_new = 1; | 1449 | desc->c_new = 1; |
1849 | desc->c_needs_zero = 1; | 1450 | desc->c_needs_zero = 1; |
1451 | desc->c_clear_unwritten = 1; | ||
1850 | *clusters_to_alloc = *clusters_to_alloc + 1; | 1452 | *clusters_to_alloc = *clusters_to_alloc + 1; |
1851 | } | 1453 | } |
1852 | 1454 | ||
1853 | if (ext_flags & OCFS2_EXT_UNWRITTEN) { | 1455 | if (ext_flags & OCFS2_EXT_UNWRITTEN) { |
1854 | desc->c_unwritten = 1; | 1456 | desc->c_clear_unwritten = 1; |
1855 | desc->c_needs_zero = 1; | 1457 | desc->c_needs_zero = 1; |
1856 | } | 1458 | } |
1857 | 1459 | ||
1460 | ret = ocfs2_unwritten_check(inode, wc, desc); | ||
1461 | if (ret) { | ||
1462 | mlog_errno(ret); | ||
1463 | goto out; | ||
1464 | } | ||
1465 | |||
1858 | num_clusters--; | 1466 | num_clusters--; |
1859 | } | 1467 | } |
1860 | 1468 | ||
@@ -2017,8 +1625,10 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, | |||
2017 | if (ret) | 1625 | if (ret) |
2018 | mlog_errno(ret); | 1626 | mlog_errno(ret); |
2019 | 1627 | ||
2020 | wc->w_first_new_cpos = | 1628 | /* There is no wc if this is call from direct. */ |
2021 | ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); | 1629 | if (wc) |
1630 | wc->w_first_new_cpos = | ||
1631 | ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); | ||
2022 | 1632 | ||
2023 | return ret; | 1633 | return ret; |
2024 | } | 1634 | } |
@@ -2072,9 +1682,8 @@ out: | |||
2072 | return ret; | 1682 | return ret; |
2073 | } | 1683 | } |
2074 | 1684 | ||
2075 | int ocfs2_write_begin_nolock(struct file *filp, | 1685 | int ocfs2_write_begin_nolock(struct address_space *mapping, |
2076 | struct address_space *mapping, | 1686 | loff_t pos, unsigned len, ocfs2_write_type_t type, |
2077 | loff_t pos, unsigned len, unsigned flags, | ||
2078 | struct page **pagep, void **fsdata, | 1687 | struct page **pagep, void **fsdata, |
2079 | struct buffer_head *di_bh, struct page *mmap_page) | 1688 | struct buffer_head *di_bh, struct page *mmap_page) |
2080 | { | 1689 | { |
@@ -2091,7 +1700,7 @@ int ocfs2_write_begin_nolock(struct file *filp, | |||
2091 | int try_free = 1, ret1; | 1700 | int try_free = 1, ret1; |
2092 | 1701 | ||
2093 | try_again: | 1702 | try_again: |
2094 | ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); | 1703 | ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, type, di_bh); |
2095 | if (ret) { | 1704 | if (ret) { |
2096 | mlog_errno(ret); | 1705 | mlog_errno(ret); |
2097 | return ret; | 1706 | return ret; |
@@ -2110,14 +1719,17 @@ try_again: | |||
2110 | } | 1719 | } |
2111 | } | 1720 | } |
2112 | 1721 | ||
2113 | if (ocfs2_sparse_alloc(osb)) | 1722 | /* Direct io change i_size late, should not zero tail here. */ |
2114 | ret = ocfs2_zero_tail(inode, di_bh, pos); | 1723 | if (type != OCFS2_WRITE_DIRECT) { |
2115 | else | 1724 | if (ocfs2_sparse_alloc(osb)) |
2116 | ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len, | 1725 | ret = ocfs2_zero_tail(inode, di_bh, pos); |
2117 | wc); | 1726 | else |
2118 | if (ret) { | 1727 | ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, |
2119 | mlog_errno(ret); | 1728 | len, wc); |
2120 | goto out; | 1729 | if (ret) { |
1730 | mlog_errno(ret); | ||
1731 | goto out; | ||
1732 | } | ||
2121 | } | 1733 | } |
2122 | 1734 | ||
2123 | ret = ocfs2_check_range_for_refcount(inode, pos, len); | 1735 | ret = ocfs2_check_range_for_refcount(inode, pos, len); |
@@ -2148,7 +1760,7 @@ try_again: | |||
2148 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1760 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
2149 | (long long)i_size_read(inode), | 1761 | (long long)i_size_read(inode), |
2150 | le32_to_cpu(di->i_clusters), | 1762 | le32_to_cpu(di->i_clusters), |
2151 | pos, len, flags, mmap_page, | 1763 | pos, len, type, mmap_page, |
2152 | clusters_to_alloc, extents_to_split); | 1764 | clusters_to_alloc, extents_to_split); |
2153 | 1765 | ||
2154 | /* | 1766 | /* |
@@ -2178,17 +1790,17 @@ try_again: | |||
2178 | 1790 | ||
2179 | credits = ocfs2_calc_extend_credits(inode->i_sb, | 1791 | credits = ocfs2_calc_extend_credits(inode->i_sb, |
2180 | &di->id2.i_list); | 1792 | &di->id2.i_list); |
2181 | 1793 | } else if (type == OCFS2_WRITE_DIRECT) | |
2182 | } | 1794 | /* direct write needs not to start trans if no extents alloc. */ |
1795 | goto success; | ||
2183 | 1796 | ||
2184 | /* | 1797 | /* |
2185 | * We have to zero sparse allocated clusters, unwritten extent clusters, | 1798 | * We have to zero sparse allocated clusters, unwritten extent clusters, |
2186 | * and non-sparse clusters we just extended. For non-sparse writes, | 1799 | * and non-sparse clusters we just extended. For non-sparse writes, |
2187 | * we know zeros will only be needed in the first and/or last cluster. | 1800 | * we know zeros will only be needed in the first and/or last cluster. |
2188 | */ | 1801 | */ |
2189 | if (clusters_to_alloc || extents_to_split || | 1802 | if (wc->w_clen && (wc->w_desc[0].c_needs_zero || |
2190 | (wc->w_clen && (wc->w_desc[0].c_needs_zero || | 1803 | wc->w_desc[wc->w_clen - 1].c_needs_zero)) |
2191 | wc->w_desc[wc->w_clen - 1].c_needs_zero))) | ||
2192 | cluster_of_pages = 1; | 1804 | cluster_of_pages = 1; |
2193 | else | 1805 | else |
2194 | cluster_of_pages = 0; | 1806 | cluster_of_pages = 0; |
@@ -2255,7 +1867,8 @@ try_again: | |||
2255 | ocfs2_free_alloc_context(meta_ac); | 1867 | ocfs2_free_alloc_context(meta_ac); |
2256 | 1868 | ||
2257 | success: | 1869 | success: |
2258 | *pagep = wc->w_target_page; | 1870 | if (pagep) |
1871 | *pagep = wc->w_target_page; | ||
2259 | *fsdata = wc; | 1872 | *fsdata = wc; |
2260 | return 0; | 1873 | return 0; |
2261 | out_quota: | 1874 | out_quota: |
@@ -2266,7 +1879,7 @@ out_commit: | |||
2266 | ocfs2_commit_trans(osb, handle); | 1879 | ocfs2_commit_trans(osb, handle); |
2267 | 1880 | ||
2268 | out: | 1881 | out: |
2269 | ocfs2_free_write_ctxt(wc); | 1882 | ocfs2_free_write_ctxt(inode, wc); |
2270 | 1883 | ||
2271 | if (data_ac) { | 1884 | if (data_ac) { |
2272 | ocfs2_free_alloc_context(data_ac); | 1885 | ocfs2_free_alloc_context(data_ac); |
@@ -2318,8 +1931,8 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
2318 | */ | 1931 | */ |
2319 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1932 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
2320 | 1933 | ||
2321 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep, | 1934 | ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_BUFFER, |
2322 | fsdata, di_bh, NULL); | 1935 | pagep, fsdata, di_bh, NULL); |
2323 | if (ret) { | 1936 | if (ret) { |
2324 | mlog_errno(ret); | 1937 | mlog_errno(ret); |
2325 | goto out_fail; | 1938 | goto out_fail; |
@@ -2376,12 +1989,16 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
2376 | handle_t *handle = wc->w_handle; | 1989 | handle_t *handle = wc->w_handle; |
2377 | struct page *tmppage; | 1990 | struct page *tmppage; |
2378 | 1991 | ||
2379 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, | 1992 | BUG_ON(!list_empty(&wc->w_unwritten_list)); |
2380 | OCFS2_JOURNAL_ACCESS_WRITE); | 1993 | |
2381 | if (ret) { | 1994 | if (handle) { |
2382 | copied = ret; | 1995 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), |
2383 | mlog_errno(ret); | 1996 | wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE); |
2384 | goto out; | 1997 | if (ret) { |
1998 | copied = ret; | ||
1999 | mlog_errno(ret); | ||
2000 | goto out; | ||
2001 | } | ||
2385 | } | 2002 | } |
2386 | 2003 | ||
2387 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 2004 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
@@ -2389,18 +2006,23 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
2389 | goto out_write_size; | 2006 | goto out_write_size; |
2390 | } | 2007 | } |
2391 | 2008 | ||
2392 | if (unlikely(copied < len)) { | 2009 | if (unlikely(copied < len) && wc->w_target_page) { |
2393 | if (!PageUptodate(wc->w_target_page)) | 2010 | if (!PageUptodate(wc->w_target_page)) |
2394 | copied = 0; | 2011 | copied = 0; |
2395 | 2012 | ||
2396 | ocfs2_zero_new_buffers(wc->w_target_page, start+copied, | 2013 | ocfs2_zero_new_buffers(wc->w_target_page, start+copied, |
2397 | start+len); | 2014 | start+len); |
2398 | } | 2015 | } |
2399 | flush_dcache_page(wc->w_target_page); | 2016 | if (wc->w_target_page) |
2017 | flush_dcache_page(wc->w_target_page); | ||
2400 | 2018 | ||
2401 | for(i = 0; i < wc->w_num_pages; i++) { | 2019 | for(i = 0; i < wc->w_num_pages; i++) { |
2402 | tmppage = wc->w_pages[i]; | 2020 | tmppage = wc->w_pages[i]; |
2403 | 2021 | ||
2022 | /* This is the direct io target page. */ | ||
2023 | if (tmppage == NULL) | ||
2024 | continue; | ||
2025 | |||
2404 | if (tmppage == wc->w_target_page) { | 2026 | if (tmppage == wc->w_target_page) { |
2405 | from = wc->w_target_from; | 2027 | from = wc->w_target_from; |
2406 | to = wc->w_target_to; | 2028 | to = wc->w_target_to; |
@@ -2419,25 +2041,29 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
2419 | } | 2041 | } |
2420 | 2042 | ||
2421 | if (page_has_buffers(tmppage)) { | 2043 | if (page_has_buffers(tmppage)) { |
2422 | if (ocfs2_should_order_data(inode)) | 2044 | if (handle && ocfs2_should_order_data(inode)) |
2423 | ocfs2_jbd2_file_inode(wc->w_handle, inode); | 2045 | ocfs2_jbd2_file_inode(handle, inode); |
2424 | block_commit_write(tmppage, from, to); | 2046 | block_commit_write(tmppage, from, to); |
2425 | } | 2047 | } |
2426 | } | 2048 | } |
2427 | 2049 | ||
2428 | out_write_size: | 2050 | out_write_size: |
2429 | pos += copied; | 2051 | /* Direct io do not update i_size here. */ |
2430 | if (pos > i_size_read(inode)) { | 2052 | if (wc->w_type != OCFS2_WRITE_DIRECT) { |
2431 | i_size_write(inode, pos); | 2053 | pos += copied; |
2432 | mark_inode_dirty(inode); | 2054 | if (pos > i_size_read(inode)) { |
2433 | } | 2055 | i_size_write(inode, pos); |
2434 | inode->i_blocks = ocfs2_inode_sector_count(inode); | 2056 | mark_inode_dirty(inode); |
2435 | di->i_size = cpu_to_le64((u64)i_size_read(inode)); | 2057 | } |
2436 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 2058 | inode->i_blocks = ocfs2_inode_sector_count(inode); |
2437 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | 2059 | di->i_size = cpu_to_le64((u64)i_size_read(inode)); |
2438 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 2060 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
2439 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | 2061 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); |
2440 | ocfs2_journal_dirty(handle, wc->w_di_bh); | 2062 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
2063 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
2064 | } | ||
2065 | if (handle) | ||
2066 | ocfs2_journal_dirty(handle, wc->w_di_bh); | ||
2441 | 2067 | ||
2442 | out: | 2068 | out: |
2443 | /* unlock pages before dealloc since it needs acquiring j_trans_barrier | 2069 | /* unlock pages before dealloc since it needs acquiring j_trans_barrier |
@@ -2447,7 +2073,8 @@ out: | |||
2447 | */ | 2073 | */ |
2448 | ocfs2_unlock_pages(wc); | 2074 | ocfs2_unlock_pages(wc); |
2449 | 2075 | ||
2450 | ocfs2_commit_trans(osb, handle); | 2076 | if (handle) |
2077 | ocfs2_commit_trans(osb, handle); | ||
2451 | 2078 | ||
2452 | ocfs2_run_deallocs(osb, &wc->w_dealloc); | 2079 | ocfs2_run_deallocs(osb, &wc->w_dealloc); |
2453 | 2080 | ||
@@ -2472,6 +2099,360 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping, | |||
2472 | return ret; | 2099 | return ret; |
2473 | } | 2100 | } |
2474 | 2101 | ||
2102 | struct ocfs2_dio_write_ctxt { | ||
2103 | struct list_head dw_zero_list; | ||
2104 | unsigned dw_zero_count; | ||
2105 | int dw_orphaned; | ||
2106 | pid_t dw_writer_pid; | ||
2107 | }; | ||
2108 | |||
2109 | static struct ocfs2_dio_write_ctxt * | ||
2110 | ocfs2_dio_alloc_write_ctx(struct buffer_head *bh, int *alloc) | ||
2111 | { | ||
2112 | struct ocfs2_dio_write_ctxt *dwc = NULL; | ||
2113 | |||
2114 | if (bh->b_private) | ||
2115 | return bh->b_private; | ||
2116 | |||
2117 | dwc = kmalloc(sizeof(struct ocfs2_dio_write_ctxt), GFP_NOFS); | ||
2118 | if (dwc == NULL) | ||
2119 | return NULL; | ||
2120 | INIT_LIST_HEAD(&dwc->dw_zero_list); | ||
2121 | dwc->dw_zero_count = 0; | ||
2122 | dwc->dw_orphaned = 0; | ||
2123 | dwc->dw_writer_pid = task_pid_nr(current); | ||
2124 | bh->b_private = dwc; | ||
2125 | *alloc = 1; | ||
2126 | |||
2127 | return dwc; | ||
2128 | } | ||
2129 | |||
2130 | static void ocfs2_dio_free_write_ctx(struct inode *inode, | ||
2131 | struct ocfs2_dio_write_ctxt *dwc) | ||
2132 | { | ||
2133 | ocfs2_free_unwritten_list(inode, &dwc->dw_zero_list); | ||
2134 | kfree(dwc); | ||
2135 | } | ||
2136 | |||
2137 | /* | ||
2138 | * TODO: Make this into a generic get_blocks function. | ||
2139 | * | ||
2140 | * From do_direct_io in direct-io.c: | ||
2141 | * "So what we do is to permit the ->get_blocks function to populate | ||
2142 | * bh.b_size with the size of IO which is permitted at this offset and | ||
2143 | * this i_blkbits." | ||
2144 | * | ||
2145 | * This function is called directly from get_more_blocks in direct-io.c. | ||
2146 | * | ||
2147 | * called like this: dio->get_blocks(dio->inode, fs_startblk, | ||
2148 | * fs_count, map_bh, dio->rw == WRITE); | ||
2149 | */ | ||
2150 | static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock, | ||
2151 | struct buffer_head *bh_result, int create) | ||
2152 | { | ||
2153 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2154 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
2155 | struct ocfs2_write_ctxt *wc; | ||
2156 | struct ocfs2_write_cluster_desc *desc = NULL; | ||
2157 | struct ocfs2_dio_write_ctxt *dwc = NULL; | ||
2158 | struct buffer_head *di_bh = NULL; | ||
2159 | u64 p_blkno; | ||
2160 | loff_t pos = iblock << inode->i_sb->s_blocksize_bits; | ||
2161 | unsigned len, total_len = bh_result->b_size; | ||
2162 | int ret = 0, first_get_block = 0; | ||
2163 | |||
2164 | len = osb->s_clustersize - (pos & (osb->s_clustersize - 1)); | ||
2165 | len = min(total_len, len); | ||
2166 | |||
2167 | mlog(0, "get block of %lu at %llu:%u req %u\n", | ||
2168 | inode->i_ino, pos, len, total_len); | ||
2169 | |||
2170 | /* | ||
2171 | * Because we need to change file size in ocfs2_dio_end_io_write(), or | ||
2172 | * we may need to add it to orphan dir. So can not fall to fast path | ||
2173 | * while file size will be changed. | ||
2174 | */ | ||
2175 | if (pos + total_len <= i_size_read(inode)) { | ||
2176 | down_read(&oi->ip_alloc_sem); | ||
2177 | /* This is the fast path for re-write. */ | ||
2178 | ret = ocfs2_get_block(inode, iblock, bh_result, create); | ||
2179 | |||
2180 | up_read(&oi->ip_alloc_sem); | ||
2181 | |||
2182 | if (buffer_mapped(bh_result) && | ||
2183 | !buffer_new(bh_result) && | ||
2184 | ret == 0) | ||
2185 | goto out; | ||
2186 | |||
2187 | /* Clear state set by ocfs2_get_block. */ | ||
2188 | bh_result->b_state = 0; | ||
2189 | } | ||
2190 | |||
2191 | dwc = ocfs2_dio_alloc_write_ctx(bh_result, &first_get_block); | ||
2192 | if (unlikely(dwc == NULL)) { | ||
2193 | ret = -ENOMEM; | ||
2194 | mlog_errno(ret); | ||
2195 | goto out; | ||
2196 | } | ||
2197 | |||
2198 | if (ocfs2_clusters_for_bytes(inode->i_sb, pos + total_len) > | ||
2199 | ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)) && | ||
2200 | !dwc->dw_orphaned) { | ||
2201 | /* | ||
2202 | * when we are going to alloc extents beyond file size, add the | ||
2203 | * inode to orphan dir, so we can recall those spaces when | ||
2204 | * system crashed during write. | ||
2205 | */ | ||
2206 | ret = ocfs2_add_inode_to_orphan(osb, inode); | ||
2207 | if (ret < 0) { | ||
2208 | mlog_errno(ret); | ||
2209 | goto out; | ||
2210 | } | ||
2211 | dwc->dw_orphaned = 1; | ||
2212 | } | ||
2213 | |||
2214 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
2215 | if (ret) { | ||
2216 | mlog_errno(ret); | ||
2217 | goto out; | ||
2218 | } | ||
2219 | |||
2220 | down_write(&oi->ip_alloc_sem); | ||
2221 | |||
2222 | if (first_get_block) { | ||
2223 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | ||
2224 | ret = ocfs2_zero_tail(inode, di_bh, pos); | ||
2225 | else | ||
2226 | ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, | ||
2227 | total_len, NULL); | ||
2228 | if (ret < 0) { | ||
2229 | mlog_errno(ret); | ||
2230 | goto unlock; | ||
2231 | } | ||
2232 | } | ||
2233 | |||
2234 | ret = ocfs2_write_begin_nolock(inode->i_mapping, pos, len, | ||
2235 | OCFS2_WRITE_DIRECT, NULL, | ||
2236 | (void **)&wc, di_bh, NULL); | ||
2237 | if (ret) { | ||
2238 | mlog_errno(ret); | ||
2239 | goto unlock; | ||
2240 | } | ||
2241 | |||
2242 | desc = &wc->w_desc[0]; | ||
2243 | |||
2244 | p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, desc->c_phys); | ||
2245 | BUG_ON(p_blkno == 0); | ||
2246 | p_blkno += iblock & (u64)(ocfs2_clusters_to_blocks(inode->i_sb, 1) - 1); | ||
2247 | |||
2248 | map_bh(bh_result, inode->i_sb, p_blkno); | ||
2249 | bh_result->b_size = len; | ||
2250 | if (desc->c_needs_zero) | ||
2251 | set_buffer_new(bh_result); | ||
2252 | |||
2253 | /* May sleep in end_io. It should not happen in a irq context. So defer | ||
2254 | * it to dio work queue. */ | ||
2255 | set_buffer_defer_completion(bh_result); | ||
2256 | |||
2257 | if (!list_empty(&wc->w_unwritten_list)) { | ||
2258 | struct ocfs2_unwritten_extent *ue = NULL; | ||
2259 | |||
2260 | ue = list_first_entry(&wc->w_unwritten_list, | ||
2261 | struct ocfs2_unwritten_extent, | ||
2262 | ue_node); | ||
2263 | BUG_ON(ue->ue_cpos != desc->c_cpos); | ||
2264 | /* The physical address may be 0, fill it. */ | ||
2265 | ue->ue_phys = desc->c_phys; | ||
2266 | |||
2267 | list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list); | ||
2268 | dwc->dw_zero_count++; | ||
2269 | } | ||
2270 | |||
2271 | ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc); | ||
2272 | BUG_ON(ret != len); | ||
2273 | ret = 0; | ||
2274 | unlock: | ||
2275 | up_write(&oi->ip_alloc_sem); | ||
2276 | ocfs2_inode_unlock(inode, 1); | ||
2277 | brelse(di_bh); | ||
2278 | out: | ||
2279 | if (ret < 0) | ||
2280 | ret = -EIO; | ||
2281 | return ret; | ||
2282 | } | ||
2283 | |||
2284 | static void ocfs2_dio_end_io_write(struct inode *inode, | ||
2285 | struct ocfs2_dio_write_ctxt *dwc, | ||
2286 | loff_t offset, | ||
2287 | ssize_t bytes) | ||
2288 | { | ||
2289 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
2290 | struct ocfs2_extent_tree et; | ||
2291 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2292 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
2293 | struct ocfs2_unwritten_extent *ue = NULL; | ||
2294 | struct buffer_head *di_bh = NULL; | ||
2295 | struct ocfs2_dinode *di; | ||
2296 | struct ocfs2_alloc_context *data_ac = NULL; | ||
2297 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
2298 | handle_t *handle = NULL; | ||
2299 | loff_t end = offset + bytes; | ||
2300 | int ret = 0, credits = 0, locked = 0; | ||
2301 | |||
2302 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
2303 | |||
2304 | /* We do clear unwritten, delete orphan, change i_size here. If neither | ||
2305 | * of these happen, we can skip all this. */ | ||
2306 | if (list_empty(&dwc->dw_zero_list) && | ||
2307 | end <= i_size_read(inode) && | ||
2308 | !dwc->dw_orphaned) | ||
2309 | goto out; | ||
2310 | |||
2311 | /* ocfs2_file_write_iter will get i_mutex, so we need not lock if we | ||
2312 | * are in that context. */ | ||
2313 | if (dwc->dw_writer_pid != task_pid_nr(current)) { | ||
2314 | mutex_lock(&inode->i_mutex); | ||
2315 | locked = 1; | ||
2316 | } | ||
2317 | |||
2318 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
2319 | if (ret < 0) { | ||
2320 | mlog_errno(ret); | ||
2321 | goto out; | ||
2322 | } | ||
2323 | |||
2324 | down_write(&oi->ip_alloc_sem); | ||
2325 | |||
2326 | /* Delete orphan before acquire i_mutex. */ | ||
2327 | if (dwc->dw_orphaned) { | ||
2328 | BUG_ON(dwc->dw_writer_pid != task_pid_nr(current)); | ||
2329 | |||
2330 | end = end > i_size_read(inode) ? end : 0; | ||
2331 | |||
2332 | ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, | ||
2333 | !!end, end); | ||
2334 | if (ret < 0) | ||
2335 | mlog_errno(ret); | ||
2336 | } | ||
2337 | |||
2338 | di = (struct ocfs2_dinode *)di_bh; | ||
2339 | |||
2340 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); | ||
2341 | |||
2342 | ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2, | ||
2343 | &data_ac, &meta_ac); | ||
2344 | if (ret) { | ||
2345 | mlog_errno(ret); | ||
2346 | goto unlock; | ||
2347 | } | ||
2348 | |||
2349 | credits = ocfs2_calc_extend_credits(inode->i_sb, &di->id2.i_list); | ||
2350 | |||
2351 | handle = ocfs2_start_trans(osb, credits); | ||
2352 | if (IS_ERR(handle)) { | ||
2353 | ret = PTR_ERR(handle); | ||
2354 | mlog_errno(ret); | ||
2355 | goto unlock; | ||
2356 | } | ||
2357 | ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, | ||
2358 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
2359 | if (ret) { | ||
2360 | mlog_errno(ret); | ||
2361 | goto commit; | ||
2362 | } | ||
2363 | |||
2364 | list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) { | ||
2365 | ret = ocfs2_mark_extent_written(inode, &et, handle, | ||
2366 | ue->ue_cpos, 1, | ||
2367 | ue->ue_phys, | ||
2368 | meta_ac, &dealloc); | ||
2369 | if (ret < 0) { | ||
2370 | mlog_errno(ret); | ||
2371 | break; | ||
2372 | } | ||
2373 | } | ||
2374 | |||
2375 | if (end > i_size_read(inode)) { | ||
2376 | ret = ocfs2_set_inode_size(handle, inode, di_bh, end); | ||
2377 | if (ret < 0) | ||
2378 | mlog_errno(ret); | ||
2379 | } | ||
2380 | commit: | ||
2381 | ocfs2_commit_trans(osb, handle); | ||
2382 | unlock: | ||
2383 | up_write(&oi->ip_alloc_sem); | ||
2384 | ocfs2_inode_unlock(inode, 1); | ||
2385 | brelse(di_bh); | ||
2386 | out: | ||
2387 | if (data_ac) | ||
2388 | ocfs2_free_alloc_context(data_ac); | ||
2389 | if (meta_ac) | ||
2390 | ocfs2_free_alloc_context(meta_ac); | ||
2391 | ocfs2_run_deallocs(osb, &dealloc); | ||
2392 | if (locked) | ||
2393 | mutex_unlock(&inode->i_mutex); | ||
2394 | ocfs2_dio_free_write_ctx(inode, dwc); | ||
2395 | } | ||
2396 | |||
2397 | /* | ||
2398 | * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're | ||
2399 | * particularly interested in the aio/dio case. We use the rw_lock DLM lock | ||
2400 | * to protect io on one node from truncation on another. | ||
2401 | */ | ||
2402 | static int ocfs2_dio_end_io(struct kiocb *iocb, | ||
2403 | loff_t offset, | ||
2404 | ssize_t bytes, | ||
2405 | void *private) | ||
2406 | { | ||
2407 | struct inode *inode = file_inode(iocb->ki_filp); | ||
2408 | int level; | ||
2409 | |||
2410 | if (bytes <= 0) | ||
2411 | return 0; | ||
2412 | |||
2413 | /* this io's submitter should not have unlocked this before we could */ | ||
2414 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); | ||
2415 | |||
2416 | if (private) | ||
2417 | ocfs2_dio_end_io_write(inode, private, offset, bytes); | ||
2418 | |||
2419 | ocfs2_iocb_clear_rw_locked(iocb); | ||
2420 | |||
2421 | level = ocfs2_iocb_rw_locked_level(iocb); | ||
2422 | ocfs2_rw_unlock(inode, level); | ||
2423 | return 0; | ||
2424 | } | ||
2425 | |||
2426 | static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | ||
2427 | loff_t offset) | ||
2428 | { | ||
2429 | struct file *file = iocb->ki_filp; | ||
2430 | struct inode *inode = file_inode(file)->i_mapping->host; | ||
2431 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
2432 | loff_t end = offset + iter->count; | ||
2433 | get_block_t *get_block; | ||
2434 | |||
2435 | /* | ||
2436 | * Fallback to buffered I/O if we see an inode without | ||
2437 | * extents. | ||
2438 | */ | ||
2439 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
2440 | return 0; | ||
2441 | |||
2442 | /* Fallback to buffered I/O if we do not support append dio. */ | ||
2443 | if (end > i_size_read(inode) && !ocfs2_supports_append_dio(osb)) | ||
2444 | return 0; | ||
2445 | |||
2446 | if (iov_iter_rw(iter) == READ) | ||
2447 | get_block = ocfs2_get_block; | ||
2448 | else | ||
2449 | get_block = ocfs2_dio_get_block; | ||
2450 | |||
2451 | return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, | ||
2452 | iter, offset, get_block, | ||
2453 | ocfs2_dio_end_io, NULL, 0); | ||
2454 | } | ||
2455 | |||
2475 | const struct address_space_operations ocfs2_aops = { | 2456 | const struct address_space_operations ocfs2_aops = { |
2476 | .readpage = ocfs2_readpage, | 2457 | .readpage = ocfs2_readpage, |
2477 | .readpages = ocfs2_readpages, | 2458 | .readpages = ocfs2_readpages, |