aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Gruenbacher <agruenba@redhat.com>2018-06-19 10:08:02 -0400
committerAndreas Gruenbacher <agruenba@redhat.com>2018-07-02 11:27:32 -0400
commit967bcc91b044936e85dbb5848952dc1335a846f4 (patch)
tree730618d2da59b6097f6799c17ad2a4effb9ca34a
parentbcfe94139a45fae128844558d6e27a0258860a90 (diff)
gfs2: iomap direct I/O support
The page unmapping previously done in gfs2_direct_IO is now done generically in iomap_dio_rw. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Reviewed-by: Bob Peterson <rpeterso@redhat.com>
-rw-r--r--fs/gfs2/aops.c100
-rw-r--r--fs/gfs2/bmap.c14
-rw-r--r--fs/gfs2/file.c132
3 files changed, 136 insertions, 110 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ecfbca9c88ff..1054cc4a96db 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -84,12 +84,6 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
84 return 0; 84 return 0;
85} 85}
86 86
87static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
88 struct buffer_head *bh_result, int create)
89{
90 return gfs2_block_map(inode, lblock, bh_result, 0);
91}
92
93/** 87/**
94 * gfs2_writepage_common - Common bits of writepage 88 * gfs2_writepage_common - Common bits of writepage
95 * @page: The page to be written 89 * @page: The page to be written
@@ -1025,96 +1019,6 @@ out:
1025} 1019}
1026 1020
1027/** 1021/**
1028 * gfs2_ok_for_dio - check that dio is valid on this file
1029 * @ip: The inode
1030 * @offset: The offset at which we are reading or writing
1031 *
1032 * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
1033 * 1 (to accept the i/o request)
1034 */
1035static int gfs2_ok_for_dio(struct gfs2_inode *ip, loff_t offset)
1036{
1037 /*
1038 * Should we return an error here? I can't see that O_DIRECT for
1039 * a stuffed file makes any sense. For now we'll silently fall
1040 * back to buffered I/O
1041 */
1042 if (gfs2_is_stuffed(ip))
1043 return 0;
1044
1045 if (offset >= i_size_read(&ip->i_inode))
1046 return 0;
1047 return 1;
1048}
1049
1050
1051
1052static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
1053{
1054 struct file *file = iocb->ki_filp;
1055 struct inode *inode = file->f_mapping->host;
1056 struct address_space *mapping = inode->i_mapping;
1057 struct gfs2_inode *ip = GFS2_I(inode);
1058 loff_t offset = iocb->ki_pos;
1059 struct gfs2_holder gh;
1060 int rv;
1061
1062 /*
1063 * Deferred lock, even if its a write, since we do no allocation
1064 * on this path. All we need change is atime, and this lock mode
1065 * ensures that other nodes have flushed their buffered read caches
1066 * (i.e. their page cache entries for this inode). We do not,
1067 * unfortunately have the option of only flushing a range like
1068 * the VFS does.
1069 */
1070 gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
1071 rv = gfs2_glock_nq(&gh);
1072 if (rv)
1073 goto out_uninit;
1074 rv = gfs2_ok_for_dio(ip, offset);
1075 if (rv != 1)
1076 goto out; /* dio not valid, fall back to buffered i/o */
1077
1078 /*
1079 * Now since we are holding a deferred (CW) lock at this point, you
1080 * might be wondering why this is ever needed. There is a case however
1081 * where we've granted a deferred local lock against a cached exclusive
1082 * glock. That is ok provided all granted local locks are deferred, but
1083 * it also means that it is possible to encounter pages which are
1084 * cached and possibly also mapped. So here we check for that and sort
1085 * them out ahead of the dio. The glock state machine will take care of
1086 * everything else.
1087 *
1088 * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
1089 * the first place, mapping->nr_pages will always be zero.
1090 */
1091 if (mapping->nrpages) {
1092 loff_t lstart = offset & ~(PAGE_SIZE - 1);
1093 loff_t len = iov_iter_count(iter);
1094 loff_t end = PAGE_ALIGN(offset + len) - 1;
1095
1096 rv = 0;
1097 if (len == 0)
1098 goto out;
1099 if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
1100 unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
1101 rv = filemap_write_and_wait_range(mapping, lstart, end);
1102 if (rv)
1103 goto out;
1104 if (iov_iter_rw(iter) == WRITE)
1105 truncate_inode_pages_range(mapping, lstart, end);
1106 }
1107
1108 rv = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
1109 gfs2_get_block_direct, NULL, NULL, 0);
1110out:
1111 gfs2_glock_dq(&gh);
1112out_uninit:
1113 gfs2_holder_uninit(&gh);
1114 return rv;
1115}
1116
1117/**
1118 * gfs2_releasepage - free the metadata associated with a page 1022 * gfs2_releasepage - free the metadata associated with a page
1119 * @page: the page that's being released 1023 * @page: the page that's being released
1120 * @gfp_mask: passed from Linux VFS, ignored by us 1024 * @gfp_mask: passed from Linux VFS, ignored by us
@@ -1194,7 +1098,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
1194 .bmap = gfs2_bmap, 1098 .bmap = gfs2_bmap,
1195 .invalidatepage = gfs2_invalidatepage, 1099 .invalidatepage = gfs2_invalidatepage,
1196 .releasepage = gfs2_releasepage, 1100 .releasepage = gfs2_releasepage,
1197 .direct_IO = gfs2_direct_IO, 1101 .direct_IO = noop_direct_IO,
1198 .migratepage = buffer_migrate_page, 1102 .migratepage = buffer_migrate_page,
1199 .is_partially_uptodate = block_is_partially_uptodate, 1103 .is_partially_uptodate = block_is_partially_uptodate,
1200 .error_remove_page = generic_error_remove_page, 1104 .error_remove_page = generic_error_remove_page,
@@ -1211,7 +1115,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
1211 .bmap = gfs2_bmap, 1115 .bmap = gfs2_bmap,
1212 .invalidatepage = gfs2_invalidatepage, 1116 .invalidatepage = gfs2_invalidatepage,
1213 .releasepage = gfs2_releasepage, 1117 .releasepage = gfs2_releasepage,
1214 .direct_IO = gfs2_direct_IO, 1118 .direct_IO = noop_direct_IO,
1215 .migratepage = buffer_migrate_page, 1119 .migratepage = buffer_migrate_page,
1216 .is_partially_uptodate = block_is_partially_uptodate, 1120 .is_partially_uptodate = block_is_partially_uptodate,
1217 .error_remove_page = generic_error_remove_page, 1121 .error_remove_page = generic_error_remove_page,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8b5876e19ecf..29391090d5b7 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -915,6 +915,9 @@ do_alloc:
915 } else if (flags & IOMAP_WRITE) { 915 } else if (flags & IOMAP_WRITE) {
916 u64 alloc_size; 916 u64 alloc_size;
917 917
918 if (flags & IOMAP_DIRECT)
919 goto out; /* (see gfs2_file_direct_write) */
920
918 len = gfs2_alloc_size(inode, mp, len); 921 len = gfs2_alloc_size(inode, mp, len);
919 alloc_size = len << inode->i_blkbits; 922 alloc_size = len << inode->i_blkbits;
920 if (alloc_size < iomap->length) 923 if (alloc_size < iomap->length)
@@ -1082,11 +1085,18 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
1082 int ret; 1085 int ret;
1083 1086
1084 trace_gfs2_iomap_start(ip, pos, length, flags); 1087 trace_gfs2_iomap_start(ip, pos, length, flags);
1085 if (flags & IOMAP_WRITE) { 1088 if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
1086 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap); 1089 ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
1087 } else { 1090 } else {
1088 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp); 1091 ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
1089 release_metapath(&mp); 1092 release_metapath(&mp);
1093 /*
1094 * Silently fall back to buffered I/O for stuffed files or if
1095 * we've hot a hole (see gfs2_file_direct_write).
1096 */
1097 if ((flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT) &&
1098 iomap->type != IOMAP_MAPPED)
1099 ret = -ENOTBLK;
1090 } 1100 }
1091 trace_gfs2_iomap_end(ip, iomap, ret); 1101 trace_gfs2_iomap_end(ip, iomap, ret);
1092 return ret; 1102 return ret;
@@ -1100,7 +1110,7 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1100 struct gfs2_trans *tr = current->journal_info; 1110 struct gfs2_trans *tr = current->journal_info;
1101 struct buffer_head *dibh = iomap->private; 1111 struct buffer_head *dibh = iomap->private;
1102 1112
1103 if (!(flags & IOMAP_WRITE)) 1113 if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
1104 goto out; 1114 goto out;
1105 1115
1106 if (iomap->type != IOMAP_INLINE) { 1116 if (iomap->type != IOMAP_INLINE) {
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 16dd395479a5..89280515169e 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -690,6 +690,85 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
690 return ret ? ret : ret1; 690 return ret ? ret : ret1;
691} 691}
692 692
693static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to)
694{
695 struct file *file = iocb->ki_filp;
696 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
697 size_t count = iov_iter_count(to);
698 struct gfs2_holder gh;
699 ssize_t ret;
700
701 if (!count)
702 return 0; /* skip atime */
703
704 gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
705 ret = gfs2_glock_nq(&gh);
706 if (ret)
707 goto out_uninit;
708
709 /* fall back to buffered I/O for stuffed files */
710 ret = -ENOTBLK;
711 if (gfs2_is_stuffed(ip))
712 goto out;
713
714 ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL);
715
716out:
717 gfs2_glock_dq(&gh);
718out_uninit:
719 gfs2_holder_uninit(&gh);
720 return ret;
721}
722
723static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
724{
725 struct file *file = iocb->ki_filp;
726 struct inode *inode = file->f_mapping->host;
727 struct gfs2_inode *ip = GFS2_I(inode);
728 size_t len = iov_iter_count(from);
729 loff_t offset = iocb->ki_pos;
730 struct gfs2_holder gh;
731 ssize_t ret;
732
733 /*
734 * Deferred lock, even if its a write, since we do no allocation on
735 * this path. All we need to change is the atime, and this lock mode
736 * ensures that other nodes have flushed their buffered read caches
737 * (i.e. their page cache entries for this inode). We do not,
738 * unfortunately, have the option of only flushing a range like the
739 * VFS does.
740 */
741 gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh);
742 ret = gfs2_glock_nq(&gh);
743 if (ret)
744 goto out_uninit;
745
746 /* Silently fall back to buffered I/O when writing beyond EOF */
747 if (offset + len > i_size_read(&ip->i_inode))
748 goto out;
749
750 ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL);
751
752out:
753 gfs2_glock_dq(&gh);
754out_uninit:
755 gfs2_holder_uninit(&gh);
756 return ret;
757}
758
759static ssize_t gfs2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
760{
761 ssize_t ret;
762
763 if (iocb->ki_flags & IOCB_DIRECT) {
764 ret = gfs2_file_direct_read(iocb, to);
765 if (likely(ret != -ENOTBLK))
766 return ret;
767 iocb->ki_flags &= ~IOCB_DIRECT;
768 }
769 return generic_file_read_iter(iocb, to);
770}
771
693/** 772/**
694 * gfs2_file_write_iter - Perform a write to a file 773 * gfs2_file_write_iter - Perform a write to a file
695 * @iocb: The io context 774 * @iocb: The io context
@@ -707,7 +786,7 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
707 struct file *file = iocb->ki_filp; 786 struct file *file = iocb->ki_filp;
708 struct inode *inode = file_inode(file); 787 struct inode *inode = file_inode(file);
709 struct gfs2_inode *ip = GFS2_I(inode); 788 struct gfs2_inode *ip = GFS2_I(inode);
710 ssize_t ret; 789 ssize_t written = 0, ret;
711 790
712 ret = gfs2_rsqa_alloc(ip); 791 ret = gfs2_rsqa_alloc(ip);
713 if (ret) 792 if (ret)
@@ -724,9 +803,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
724 gfs2_glock_dq_uninit(&gh); 803 gfs2_glock_dq_uninit(&gh);
725 } 804 }
726 805
727 if (iocb->ki_flags & IOCB_DIRECT)
728 return generic_file_write_iter(iocb, from);
729
730 inode_lock(inode); 806 inode_lock(inode);
731 ret = generic_write_checks(iocb, from); 807 ret = generic_write_checks(iocb, from);
732 if (ret <= 0) 808 if (ret <= 0)
@@ -743,19 +819,55 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
743 if (ret) 819 if (ret)
744 goto out2; 820 goto out2;
745 821
746 ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops); 822 if (iocb->ki_flags & IOCB_DIRECT) {
823 struct address_space *mapping = file->f_mapping;
824 loff_t pos, endbyte;
825 ssize_t buffered;
826
827 written = gfs2_file_direct_write(iocb, from);
828 if (written < 0 || !iov_iter_count(from))
829 goto out2;
830
831 ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
832 if (unlikely(ret < 0))
833 goto out2;
834 buffered = ret;
835
836 /*
837 * We need to ensure that the page cache pages are written to
838 * disk and invalidated to preserve the expected O_DIRECT
839 * semantics.
840 */
841 pos = iocb->ki_pos;
842 endbyte = pos + buffered - 1;
843 ret = filemap_write_and_wait_range(mapping, pos, endbyte);
844 if (!ret) {
845 iocb->ki_pos += buffered;
846 written += buffered;
847 invalidate_mapping_pages(mapping,
848 pos >> PAGE_SHIFT,
849 endbyte >> PAGE_SHIFT);
850 } else {
851 /*
852 * We don't know how much we wrote, so just return
853 * the number of bytes which were direct-written
854 */
855 }
856 } else {
857 ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
858 if (likely(ret > 0))
859 iocb->ki_pos += ret;
860 }
747 861
748out2: 862out2:
749 current->backing_dev_info = NULL; 863 current->backing_dev_info = NULL;
750out: 864out:
751 inode_unlock(inode); 865 inode_unlock(inode);
752 if (likely(ret > 0)) { 866 if (likely(ret > 0)) {
753 iocb->ki_pos += ret;
754
755 /* Handle various SYNC-type writes */ 867 /* Handle various SYNC-type writes */
756 ret = generic_write_sync(iocb, ret); 868 ret = generic_write_sync(iocb, ret);
757 } 869 }
758 return ret; 870 return written ? written : ret;
759} 871}
760 872
761static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, 873static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
@@ -1157,7 +1269,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
1157 1269
1158const struct file_operations gfs2_file_fops = { 1270const struct file_operations gfs2_file_fops = {
1159 .llseek = gfs2_llseek, 1271 .llseek = gfs2_llseek,
1160 .read_iter = generic_file_read_iter, 1272 .read_iter = gfs2_file_read_iter,
1161 .write_iter = gfs2_file_write_iter, 1273 .write_iter = gfs2_file_write_iter,
1162 .unlocked_ioctl = gfs2_ioctl, 1274 .unlocked_ioctl = gfs2_ioctl,
1163 .mmap = gfs2_mmap, 1275 .mmap = gfs2_mmap,
@@ -1187,7 +1299,7 @@ const struct file_operations gfs2_dir_fops = {
1187 1299
1188const struct file_operations gfs2_file_fops_nolock = { 1300const struct file_operations gfs2_file_fops_nolock = {
1189 .llseek = gfs2_llseek, 1301 .llseek = gfs2_llseek,
1190 .read_iter = generic_file_read_iter, 1302 .read_iter = gfs2_file_read_iter,
1191 .write_iter = gfs2_file_write_iter, 1303 .write_iter = gfs2_file_write_iter,
1192 .unlocked_ioctl = gfs2_ioctl, 1304 .unlocked_ioctl = gfs2_ioctl,
1193 .mmap = gfs2_mmap, 1305 .mmap = gfs2_mmap,