diff options
author | Andreas Gruenbacher <agruenba@redhat.com> | 2018-06-19 10:08:02 -0400 |
---|---|---|
committer | Andreas Gruenbacher <agruenba@redhat.com> | 2018-07-02 11:27:32 -0400 |
commit | 967bcc91b044936e85dbb5848952dc1335a846f4 (patch) | |
tree | 730618d2da59b6097f6799c17ad2a4effb9ca34a | |
parent | bcfe94139a45fae128844558d6e27a0258860a90 (diff) |
gfs2: iomap direct I/O support
The page unmapping previously done in gfs2_direct_IO is now done
generically in iomap_dio_rw.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Bob Peterson <rpeterso@redhat.com>
-rw-r--r-- | fs/gfs2/aops.c | 100 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 14 | ||||
-rw-r--r-- | fs/gfs2/file.c | 132 |
3 files changed, 136 insertions, 110 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ecfbca9c88ff..1054cc4a96db 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -84,12 +84,6 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, | |||
84 | return 0; | 84 | return 0; |
85 | } | 85 | } |
86 | 86 | ||
87 | static int gfs2_get_block_direct(struct inode *inode, sector_t lblock, | ||
88 | struct buffer_head *bh_result, int create) | ||
89 | { | ||
90 | return gfs2_block_map(inode, lblock, bh_result, 0); | ||
91 | } | ||
92 | |||
93 | /** | 87 | /** |
94 | * gfs2_writepage_common - Common bits of writepage | 88 | * gfs2_writepage_common - Common bits of writepage |
95 | * @page: The page to be written | 89 | * @page: The page to be written |
@@ -1025,96 +1019,6 @@ out: | |||
1025 | } | 1019 | } |
1026 | 1020 | ||
1027 | /** | 1021 | /** |
1028 | * gfs2_ok_for_dio - check that dio is valid on this file | ||
1029 | * @ip: The inode | ||
1030 | * @offset: The offset at which we are reading or writing | ||
1031 | * | ||
1032 | * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o) | ||
1033 | * 1 (to accept the i/o request) | ||
1034 | */ | ||
1035 | static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset) | ||
1036 | { | ||
1037 | /* | ||
1038 | * Should we return an error here? I can't see that O_DIRECT for | ||
1039 | * a stuffed file makes any sense. For now we'll silently fall | ||
1040 | * back to buffered I/O | ||
1041 | */ | ||
1042 | if (gfs2_is_stuffed(ip)) | ||
1043 | return 0; | ||
1044 | |||
1045 | if (offset >= i_size_read(&ip->i_inode)) | ||
1046 | return 0; | ||
1047 | return 1; | ||
1048 | } | ||
1049 | |||
1050 | |||
1051 | |||
1052 | static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | ||
1053 | { | ||
1054 | struct file *file = iocb->ki_filp; | ||
1055 | struct inode *inode = file->f_mapping->host; | ||
1056 | struct address_space *mapping = inode->i_mapping; | ||
1057 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1058 | loff_t offset = iocb->ki_pos; | ||
1059 | struct gfs2_holder gh; | ||
1060 | int rv; | ||
1061 | |||
1062 | /* | ||
1063 | * Deferred lock, even if its a write, since we do no allocation | ||
1064 | * on this path. All we need change is atime, and this lock mode | ||
1065 | * ensures that other nodes have flushed their buffered read caches | ||
1066 | * (i.e. their page cache entries for this inode). We do not, | ||
1067 | * unfortunately have the option of only flushing a range like | ||
1068 | * the VFS does. | ||
1069 | */ | ||
1070 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); | ||
1071 | rv = gfs2_glock_nq(&gh); | ||
1072 | if (rv) | ||
1073 | goto out_uninit; | ||
1074 | rv = gfs2_ok_for_dio(ip, offset); | ||
1075 | if (rv != 1) | ||
1076 | goto out; /* dio not valid, fall back to buffered i/o */ | ||
1077 | |||
1078 | /* | ||
1079 | * Now since we are holding a deferred (CW) lock at this point, you | ||
1080 | * might be wondering why this is ever needed. There is a case however | ||
1081 | * where we've granted a deferred local lock against a cached exclusive | ||
1082 | * glock. That is ok provided all granted local locks are deferred, but | ||
1083 | * it also means that it is possible to encounter pages which are | ||
1084 | * cached and possibly also mapped. So here we check for that and sort | ||
1085 | * them out ahead of the dio. The glock state machine will take care of | ||
1086 | * everything else. | ||
1087 | * | ||
1088 | * If in fact the cached glock state (gl->gl_state) is deferred (CW) in | ||
1089 | * the first place, mapping->nr_pages will always be zero. | ||
1090 | */ | ||
1091 | if (mapping->nrpages) { | ||
1092 | loff_t lstart = offset & ~(PAGE_SIZE - 1); | ||
1093 | loff_t len = iov_iter_count(iter); | ||
1094 | loff_t end = PAGE_ALIGN(offset + len) - 1; | ||
1095 | |||
1096 | rv = 0; | ||
1097 | if (len == 0) | ||
1098 | goto out; | ||
1099 | if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) | ||
1100 | unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); | ||
1101 | rv = filemap_write_and_wait_range(mapping, lstart, end); | ||
1102 | if (rv) | ||
1103 | goto out; | ||
1104 | if (iov_iter_rw(iter) == WRITE) | ||
1105 | truncate_inode_pages_range(mapping, lstart, end); | ||
1106 | } | ||
1107 | |||
1108 | rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, | ||
1109 | gfs2_get_block_direct, NULL, NULL, 0); | ||
1110 | out: | ||
1111 | gfs2_glock_dq(&gh); | ||
1112 | out_uninit: | ||
1113 | gfs2_holder_uninit(&gh); | ||
1114 | return rv; | ||
1115 | } | ||
1116 | |||
1117 | /** | ||
1118 | * gfs2_releasepage - free the metadata associated with a page | 1022 | * gfs2_releasepage - free the metadata associated with a page |
1119 | * @page: the page that's being released | 1023 | * @page: the page that's being released |
1120 | * @gfp_mask: passed from Linux VFS, ignored by us | 1024 | * @gfp_mask: passed from Linux VFS, ignored by us |
@@ -1194,7 +1098,7 @@ static const struct address_space_operations gfs2_writeback_aops = { | |||
1194 | .bmap = gfs2_bmap, | 1098 | .bmap = gfs2_bmap, |
1195 | .invalidatepage = gfs2_invalidatepage, | 1099 | .invalidatepage = gfs2_invalidatepage, |
1196 | .releasepage = gfs2_releasepage, | 1100 | .releasepage = gfs2_releasepage, |
1197 | .direct_IO = gfs2_direct_IO, | 1101 | .direct_IO = noop_direct_IO, |
1198 | .migratepage = buffer_migrate_page, | 1102 | .migratepage = buffer_migrate_page, |
1199 | .is_partially_uptodate = block_is_partially_uptodate, | 1103 | .is_partially_uptodate = block_is_partially_uptodate, |
1200 | .error_remove_page = generic_error_remove_page, | 1104 | .error_remove_page = generic_error_remove_page, |
@@ -1211,7 +1115,7 @@ static const struct address_space_operations gfs2_ordered_aops = { | |||
1211 | .bmap = gfs2_bmap, | 1115 | .bmap = gfs2_bmap, |
1212 | .invalidatepage = gfs2_invalidatepage, | 1116 | .invalidatepage = gfs2_invalidatepage, |
1213 | .releasepage = gfs2_releasepage, | 1117 | .releasepage = gfs2_releasepage, |
1214 | .direct_IO = gfs2_direct_IO, | 1118 | .direct_IO = noop_direct_IO, |
1215 | .migratepage = buffer_migrate_page, | 1119 | .migratepage = buffer_migrate_page, |
1216 | .is_partially_uptodate = block_is_partially_uptodate, | 1120 | .is_partially_uptodate = block_is_partially_uptodate, |
1217 | .error_remove_page = generic_error_remove_page, | 1121 | .error_remove_page = generic_error_remove_page, |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8b5876e19ecf..29391090d5b7 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -915,6 +915,9 @@ do_alloc: | |||
915 | } else if (flags & IOMAP_WRITE) { | 915 | } else if (flags & IOMAP_WRITE) { |
916 | u64 alloc_size; | 916 | u64 alloc_size; |
917 | 917 | ||
918 | if (flags & IOMAP_DIRECT) | ||
919 | goto out; /* (see gfs2_file_direct_write) */ | ||
920 | |||
918 | len = gfs2_alloc_size(inode, mp, len); | 921 | len = gfs2_alloc_size(inode, mp, len); |
919 | alloc_size = len << inode->i_blkbits; | 922 | alloc_size = len << inode->i_blkbits; |
920 | if (alloc_size < iomap->length) | 923 | if (alloc_size < iomap->length) |
@@ -1082,11 +1085,18 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, | |||
1082 | int ret; | 1085 | int ret; |
1083 | 1086 | ||
1084 | trace_gfs2_iomap_start(ip, pos, length, flags); | 1087 | trace_gfs2_iomap_start(ip, pos, length, flags); |
1085 | if (flags & IOMAP_WRITE) { | 1088 | if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) { |
1086 | ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); | 1089 | ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); |
1087 | } else { | 1090 | } else { |
1088 | ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); | 1091 | ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); |
1089 | release_metapath(&mp); | 1092 | release_metapath(&mp); |
1093 | /* | ||
1094 | * Silently fall back to buffered I/O for stuffed files or if | ||
1095 | * we've hot a hole (see gfs2_file_direct_write). | ||
1096 | */ | ||
1097 | if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) && | ||
1098 | iomap->type != IOMAP_MAPPED) | ||
1099 | ret = -ENOTBLK; | ||
1090 | } | 1100 | } |
1091 | trace_gfs2_iomap_end(ip, iomap, ret); | 1101 | trace_gfs2_iomap_end(ip, iomap, ret); |
1092 | return ret; | 1102 | return ret; |
@@ -1100,7 +1110,7 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, | |||
1100 | struct gfs2_trans *tr = current->journal_info; | 1110 | struct gfs2_trans *tr = current->journal_info; |
1101 | struct buffer_head *dibh = iomap->private; | 1111 | struct buffer_head *dibh = iomap->private; |
1102 | 1112 | ||
1103 | if (!(flags & IOMAP_WRITE)) | 1113 | if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE) |
1104 | goto out; | 1114 | goto out; |
1105 | 1115 | ||
1106 | if (iomap->type != IOMAP_INLINE) { | 1116 | if (iomap->type != IOMAP_INLINE) { |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 16dd395479a5..89280515169e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -690,6 +690,85 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, | |||
690 | return ret ? ret : ret1; | 690 | return ret ? ret : ret1; |
691 | } | 691 | } |
692 | 692 | ||
693 | static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to) | ||
694 | { | ||
695 | struct file *file = iocb->ki_filp; | ||
696 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | ||
697 | size_t count = iov_iter_count(to); | ||
698 | struct gfs2_holder gh; | ||
699 | ssize_t ret; | ||
700 | |||
701 | if (!count) | ||
702 | return 0; /* skip atime */ | ||
703 | |||
704 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); | ||
705 | ret = gfs2_glock_nq(&gh); | ||
706 | if (ret) | ||
707 | goto out_uninit; | ||
708 | |||
709 | /* fall back to buffered I/O for stuffed files */ | ||
710 | ret = -ENOTBLK; | ||
711 | if (gfs2_is_stuffed(ip)) | ||
712 | goto out; | ||
713 | |||
714 | ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL); | ||
715 | |||
716 | out: | ||
717 | gfs2_glock_dq(&gh); | ||
718 | out_uninit: | ||
719 | gfs2_holder_uninit(&gh); | ||
720 | return ret; | ||
721 | } | ||
722 | |||
723 | static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from) | ||
724 | { | ||
725 | struct file *file = iocb->ki_filp; | ||
726 | struct inode *inode = file->f_mapping->host; | ||
727 | struct gfs2_inode *ip = GFS2_I(inode); | ||
728 | size_t len = iov_iter_count(from); | ||
729 | loff_t offset = iocb->ki_pos; | ||
730 | struct gfs2_holder gh; | ||
731 | ssize_t ret; | ||
732 | |||
733 | /* | ||
734 | * Deferred lock, even if its a write, since we do no allocation on | ||
735 | * this path. All we need to change is the atime, and this lock mode | ||
736 | * ensures that other nodes have flushed their buffered read caches | ||
737 | * (i.e. their page cache entries for this inode). We do not, | ||
738 | * unfortunately, have the option of only flushing a range like the | ||
739 | * VFS does. | ||
740 | */ | ||
741 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); | ||
742 | ret = gfs2_glock_nq(&gh); | ||
743 | if (ret) | ||
744 | goto out_uninit; | ||
745 | |||
746 | /* Silently fall back to buffered I/O when writing beyond EOF */ | ||
747 | if (offset + len > i_size_read(&ip->i_inode)) | ||
748 | goto out; | ||
749 | |||
750 | ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL); | ||
751 | |||
752 | out: | ||
753 | gfs2_glock_dq(&gh); | ||
754 | out_uninit: | ||
755 | gfs2_holder_uninit(&gh); | ||
756 | return ret; | ||
757 | } | ||
758 | |||
759 | static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to) | ||
760 | { | ||
761 | ssize_t ret; | ||
762 | |||
763 | if (iocb->ki_flags & IOCB_DIRECT) { | ||
764 | ret = gfs2_file_direct_read(iocb, to); | ||
765 | if (likely(ret != -ENOTBLK)) | ||
766 | return ret; | ||
767 | iocb->ki_flags &= ~IOCB_DIRECT; | ||
768 | } | ||
769 | return generic_file_read_iter(iocb, to); | ||
770 | } | ||
771 | |||
693 | /** | 772 | /** |
694 | * gfs2_file_write_iter - Perform a write to a file | 773 | * gfs2_file_write_iter - Perform a write to a file |
695 | * @iocb: The io context | 774 | * @iocb: The io context |
@@ -707,7 +786,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
707 | struct file *file = iocb->ki_filp; | 786 | struct file *file = iocb->ki_filp; |
708 | struct inode *inode = file_inode(file); | 787 | struct inode *inode = file_inode(file); |
709 | struct gfs2_inode *ip = GFS2_I(inode); | 788 | struct gfs2_inode *ip = GFS2_I(inode); |
710 | ssize_t ret; | 789 | ssize_t written = 0, ret; |
711 | 790 | ||
712 | ret = gfs2_rsqa_alloc(ip); | 791 | ret = gfs2_rsqa_alloc(ip); |
713 | if (ret) | 792 | if (ret) |
@@ -724,9 +803,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
724 | gfs2_glock_dq_uninit(&gh); | 803 | gfs2_glock_dq_uninit(&gh); |
725 | } | 804 | } |
726 | 805 | ||
727 | if (iocb->ki_flags & IOCB_DIRECT) | ||
728 | return generic_file_write_iter(iocb, from); | ||
729 | |||
730 | inode_lock(inode); | 806 | inode_lock(inode); |
731 | ret = generic_write_checks(iocb, from); | 807 | ret = generic_write_checks(iocb, from); |
732 | if (ret <= 0) | 808 | if (ret <= 0) |
@@ -743,19 +819,55 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) | |||
743 | if (ret) | 819 | if (ret) |
744 | goto out2; | 820 | goto out2; |
745 | 821 | ||
746 | ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); | 822 | if (iocb->ki_flags & IOCB_DIRECT) { |
823 | struct address_space *mapping = file->f_mapping; | ||
824 | loff_t pos, endbyte; | ||
825 | ssize_t buffered; | ||
826 | |||
827 | written = gfs2_file_direct_write(iocb, from); | ||
828 | if (written < 0 || !iov_iter_count(from)) | ||
829 | goto out2; | ||
830 | |||
831 | ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); | ||
832 | if (unlikely(ret < 0)) | ||
833 | goto out2; | ||
834 | buffered = ret; | ||
835 | |||
836 | /* | ||
837 | * We need to ensure that the page cache pages are written to | ||
838 | * disk and invalidated to preserve the expected O_DIRECT | ||
839 | * semantics. | ||
840 | */ | ||
841 | pos = iocb->ki_pos; | ||
842 | endbyte = pos + buffered - 1; | ||
843 | ret = filemap_write_and_wait_range(mapping, pos, endbyte); | ||
844 | if (!ret) { | ||
845 | iocb->ki_pos += buffered; | ||
846 | written += buffered; | ||
847 | invalidate_mapping_pages(mapping, | ||
848 | pos >> PAGE_SHIFT, | ||
849 | endbyte >> PAGE_SHIFT); | ||
850 | } else { | ||
851 | /* | ||
852 | * We don't know how much we wrote, so just return | ||
853 | * the number of bytes which were direct-written | ||
854 | */ | ||
855 | } | ||
856 | } else { | ||
857 | ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); | ||
858 | if (likely(ret > 0)) | ||
859 | iocb->ki_pos += ret; | ||
860 | } | ||
747 | 861 | ||
748 | out2: | 862 | out2: |
749 | current->backing_dev_info = NULL; | 863 | current->backing_dev_info = NULL; |
750 | out: | 864 | out: |
751 | inode_unlock(inode); | 865 | inode_unlock(inode); |
752 | if (likely(ret > 0)) { | 866 | if (likely(ret > 0)) { |
753 | iocb->ki_pos += ret; | ||
754 | |||
755 | /* Handle various SYNC-type writes */ | 867 | /* Handle various SYNC-type writes */ |
756 | ret = generic_write_sync(iocb, ret); | 868 | ret = generic_write_sync(iocb, ret); |
757 | } | 869 | } |
758 | return ret; | 870 | return written ? written : ret; |
759 | } | 871 | } |
760 | 872 | ||
761 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | 873 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, |
@@ -1157,7 +1269,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) | |||
1157 | 1269 | ||
1158 | const struct file_operations gfs2_file_fops = { | 1270 | const struct file_operations gfs2_file_fops = { |
1159 | .llseek = gfs2_llseek, | 1271 | .llseek = gfs2_llseek, |
1160 | .read_iter = generic_file_read_iter, | 1272 | .read_iter = gfs2_file_read_iter, |
1161 | .write_iter = gfs2_file_write_iter, | 1273 | .write_iter = gfs2_file_write_iter, |
1162 | .unlocked_ioctl = gfs2_ioctl, | 1274 | .unlocked_ioctl = gfs2_ioctl, |
1163 | .mmap = gfs2_mmap, | 1275 | .mmap = gfs2_mmap, |
@@ -1187,7 +1299,7 @@ const struct file_operations gfs2_dir_fops = { | |||
1187 | 1299 | ||
1188 | const struct file_operations gfs2_file_fops_nolock = { | 1300 | const struct file_operations gfs2_file_fops_nolock = { |
1189 | .llseek = gfs2_llseek, | 1301 | .llseek = gfs2_llseek, |
1190 | .read_iter = generic_file_read_iter, | 1302 | .read_iter = gfs2_file_read_iter, |
1191 | .write_iter = gfs2_file_write_iter, | 1303 | .write_iter = gfs2_file_write_iter, |
1192 | .unlocked_ioctl = gfs2_ioctl, | 1304 | .unlocked_ioctl = gfs2_ioctl, |
1193 | .mmap = gfs2_mmap, | 1305 | .mmap = gfs2_mmap, |