diff options
-rw-r--r-- | fs/ocfs2/file.c | 187 |
1 files changed, 137 insertions, 50 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 39b2f1653e25..b32cdb3bf7c5 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/pagemap.h> | 31 | #include <linux/pagemap.h> |
32 | #include <linux/uio.h> | 32 | #include <linux/uio.h> |
33 | #include <linux/sched.h> | 33 | #include <linux/sched.h> |
34 | #include <linux/pipe_fs_i.h> | ||
34 | 35 | ||
35 | #define MLOG_MASK_PREFIX ML_INODE | 36 | #define MLOG_MASK_PREFIX ML_INODE |
36 | #include <cluster/masklog.h> | 37 | #include <cluster/masklog.h> |
@@ -943,53 +944,21 @@ out: | |||
943 | return ret; | 944 | return ret; |
944 | } | 945 | } |
945 | 946 | ||
946 | static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | 947 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, |
947 | const struct iovec *iov, | 948 | loff_t *ppos, |
948 | unsigned long nr_segs, | 949 | size_t count, |
949 | loff_t pos) | 950 | int appending) |
950 | { | 951 | { |
951 | int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0; | 952 | int ret = 0, meta_level = appending; |
953 | struct inode *inode = dentry->d_inode; | ||
952 | u32 clusters; | 954 | u32 clusters; |
953 | struct file *filp = iocb->ki_filp; | ||
954 | struct inode *inode = filp->f_dentry->d_inode; | ||
955 | loff_t newsize, saved_pos; | 955 | loff_t newsize, saved_pos; |
956 | 956 | ||
957 | mlog_entry("(0x%p, %u, '%.*s')\n", filp, | ||
958 | (unsigned int)nr_segs, | ||
959 | filp->f_dentry->d_name.len, | ||
960 | filp->f_dentry->d_name.name); | ||
961 | |||
962 | /* happy write of zero bytes */ | ||
963 | if (iocb->ki_left == 0) | ||
964 | return 0; | ||
965 | |||
966 | if (!inode) { | ||
967 | mlog(0, "bad inode\n"); | ||
968 | return -EIO; | ||
969 | } | ||
970 | |||
971 | mutex_lock(&inode->i_mutex); | ||
972 | /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ | ||
973 | if (filp->f_flags & O_DIRECT) { | ||
974 | have_alloc_sem = 1; | ||
975 | down_read(&inode->i_alloc_sem); | ||
976 | } | ||
977 | |||
978 | /* concurrent O_DIRECT writes are allowed */ | ||
979 | rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1; | ||
980 | ret = ocfs2_rw_lock(inode, rw_level); | ||
981 | if (ret < 0) { | ||
982 | rw_level = -1; | ||
983 | mlog_errno(ret); | ||
984 | goto out; | ||
985 | } | ||
986 | |||
987 | /* | 957 | /* |
988 | * We sample i_size under a read level meta lock to see if our write | 958 | * We sample i_size under a read level meta lock to see if our write |
989 | * is extending the file, if it is we back off and get a write level | 959 | * is extending the file, if it is we back off and get a write level |
990 | * meta lock. | 960 | * meta lock. |
991 | */ | 961 | */ |
992 | meta_level = (filp->f_flags & O_APPEND) ? 1 : 0; | ||
993 | for(;;) { | 962 | for(;;) { |
994 | ret = ocfs2_meta_lock(inode, NULL, meta_level); | 963 | ret = ocfs2_meta_lock(inode, NULL, meta_level); |
995 | if (ret < 0) { | 964 | if (ret < 0) { |
@@ -1007,7 +976,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
1007 | * inode. There's also the dinode i_size state which | 976 | * inode. There's also the dinode i_size state which |
1008 | * can be lost via setattr during extending writes (we | 977 | * can be lost via setattr during extending writes (we |
1009 | * set inode->i_size at the end of a write. */ | 978 | * set inode->i_size at the end of a write. */ |
1010 | if (should_remove_suid(filp->f_dentry)) { | 979 | if (should_remove_suid(dentry)) { |
1011 | if (meta_level == 0) { | 980 | if (meta_level == 0) { |
1012 | ocfs2_meta_unlock(inode, meta_level); | 981 | ocfs2_meta_unlock(inode, meta_level); |
1013 | meta_level = 1; | 982 | meta_level = 1; |
@@ -1017,19 +986,19 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
1017 | ret = ocfs2_write_remove_suid(inode); | 986 | ret = ocfs2_write_remove_suid(inode); |
1018 | if (ret < 0) { | 987 | if (ret < 0) { |
1019 | mlog_errno(ret); | 988 | mlog_errno(ret); |
1020 | goto out; | 989 | goto out_unlock; |
1021 | } | 990 | } |
1022 | } | 991 | } |
1023 | 992 | ||
1024 | /* work on a copy of ppos until we're sure that we won't have | 993 | /* work on a copy of ppos until we're sure that we won't have |
1025 | * to recalculate it due to relocking. */ | 994 | * to recalculate it due to relocking. */ |
1026 | if (filp->f_flags & O_APPEND) { | 995 | if (appending) { |
1027 | saved_pos = i_size_read(inode); | 996 | saved_pos = i_size_read(inode); |
1028 | mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); | 997 | mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); |
1029 | } else { | 998 | } else { |
1030 | saved_pos = iocb->ki_pos; | 999 | saved_pos = *ppos; |
1031 | } | 1000 | } |
1032 | newsize = iocb->ki_left + saved_pos; | 1001 | newsize = count + saved_pos; |
1033 | 1002 | ||
1034 | mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", | 1003 | mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", |
1035 | (long long) saved_pos, (long long) newsize, | 1004 | (long long) saved_pos, (long long) newsize, |
@@ -1062,19 +1031,66 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
1062 | if (!clusters) | 1031 | if (!clusters) |
1063 | break; | 1032 | break; |
1064 | 1033 | ||
1065 | ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left); | 1034 | ret = ocfs2_extend_file(inode, NULL, newsize, count); |
1066 | if (ret < 0) { | 1035 | if (ret < 0) { |
1067 | if (ret != -ENOSPC) | 1036 | if (ret != -ENOSPC) |
1068 | mlog_errno(ret); | 1037 | mlog_errno(ret); |
1069 | goto out; | 1038 | goto out_unlock; |
1070 | } | 1039 | } |
1071 | break; | 1040 | break; |
1072 | } | 1041 | } |
1073 | 1042 | ||
1074 | /* ok, we're done with i_size and alloc work */ | 1043 | if (appending) |
1075 | iocb->ki_pos = saved_pos; | 1044 | *ppos = saved_pos; |
1045 | |||
1046 | out_unlock: | ||
1076 | ocfs2_meta_unlock(inode, meta_level); | 1047 | ocfs2_meta_unlock(inode, meta_level); |
1077 | meta_level = -1; | 1048 | |
1049 | out: | ||
1050 | return ret; | ||
1051 | } | ||
1052 | |||
1053 | static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | ||
1054 | const struct iovec *iov, | ||
1055 | unsigned long nr_segs, | ||
1056 | loff_t pos) | ||
1057 | { | ||
1058 | int ret, rw_level, have_alloc_sem = 0; | ||
1059 | struct file *filp = iocb->ki_filp; | ||
1060 | struct inode *inode = filp->f_dentry->d_inode; | ||
1061 | int appending = filp->f_flags & O_APPEND ? 1 : 0; | ||
1062 | |||
1063 | mlog_entry("(0x%p, %u, '%.*s')\n", filp, | ||
1064 | (unsigned int)nr_segs, | ||
1065 | filp->f_dentry->d_name.len, | ||
1066 | filp->f_dentry->d_name.name); | ||
1067 | |||
1068 | /* happy write of zero bytes */ | ||
1069 | if (iocb->ki_left == 0) | ||
1070 | return 0; | ||
1071 | |||
1072 | mutex_lock(&inode->i_mutex); | ||
1073 | /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ | ||
1074 | if (filp->f_flags & O_DIRECT) { | ||
1075 | have_alloc_sem = 1; | ||
1076 | down_read(&inode->i_alloc_sem); | ||
1077 | } | ||
1078 | |||
1079 | /* concurrent O_DIRECT writes are allowed */ | ||
1080 | rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1; | ||
1081 | ret = ocfs2_rw_lock(inode, rw_level); | ||
1082 | if (ret < 0) { | ||
1083 | rw_level = -1; | ||
1084 | mlog_errno(ret); | ||
1085 | goto out; | ||
1086 | } | ||
1087 | |||
1088 | ret = ocfs2_prepare_inode_for_write(filp->f_dentry, &iocb->ki_pos, | ||
1089 | iocb->ki_left, appending); | ||
1090 | if (ret < 0) { | ||
1091 | mlog_errno(ret); | ||
1092 | goto out; | ||
1093 | } | ||
1078 | 1094 | ||
1079 | /* communicate with ocfs2_dio_end_io */ | 1095 | /* communicate with ocfs2_dio_end_io */ |
1080 | ocfs2_iocb_set_rw_locked(iocb); | 1096 | ocfs2_iocb_set_rw_locked(iocb); |
@@ -1100,8 +1116,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
1100 | } | 1116 | } |
1101 | 1117 | ||
1102 | out: | 1118 | out: |
1103 | if (meta_level != -1) | ||
1104 | ocfs2_meta_unlock(inode, meta_level); | ||
1105 | if (have_alloc_sem) | 1119 | if (have_alloc_sem) |
1106 | up_read(&inode->i_alloc_sem); | 1120 | up_read(&inode->i_alloc_sem); |
1107 | if (rw_level != -1) | 1121 | if (rw_level != -1) |
@@ -1112,6 +1126,77 @@ out: | |||
1112 | return ret; | 1126 | return ret; |
1113 | } | 1127 | } |
1114 | 1128 | ||
1129 | static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, | ||
1130 | struct file *out, | ||
1131 | loff_t *ppos, | ||
1132 | size_t len, | ||
1133 | unsigned int flags) | ||
1134 | { | ||
1135 | int ret; | ||
1136 | struct inode *inode = out->f_dentry->d_inode; | ||
1137 | |||
1138 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, | ||
1139 | (unsigned int)len, | ||
1140 | out->f_dentry->d_name.len, | ||
1141 | out->f_dentry->d_name.name); | ||
1142 | |||
1143 | inode_double_lock(inode, pipe->inode); | ||
1144 | |||
1145 | ret = ocfs2_rw_lock(inode, 1); | ||
1146 | if (ret < 0) { | ||
1147 | mlog_errno(ret); | ||
1148 | goto out; | ||
1149 | } | ||
1150 | |||
1151 | ret = ocfs2_prepare_inode_for_write(out->f_dentry, ppos, len, 0); | ||
1152 | if (ret < 0) { | ||
1153 | mlog_errno(ret); | ||
1154 | goto out_unlock; | ||
1155 | } | ||
1156 | |||
1157 | /* ok, we're done with i_size and alloc work */ | ||
1158 | ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags); | ||
1159 | |||
1160 | out_unlock: | ||
1161 | ocfs2_rw_unlock(inode, 1); | ||
1162 | out: | ||
1163 | inode_double_unlock(inode, pipe->inode); | ||
1164 | |||
1165 | mlog_exit(ret); | ||
1166 | return ret; | ||
1167 | } | ||
1168 | |||
1169 | static ssize_t ocfs2_file_splice_read(struct file *in, | ||
1170 | loff_t *ppos, | ||
1171 | struct pipe_inode_info *pipe, | ||
1172 | size_t len, | ||
1173 | unsigned int flags) | ||
1174 | { | ||
1175 | int ret = 0; | ||
1176 | struct inode *inode = in->f_dentry->d_inode; | ||
1177 | |||
1178 | mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, | ||
1179 | (unsigned int)len, | ||
1180 | in->f_dentry->d_name.len, | ||
1181 | in->f_dentry->d_name.name); | ||
1182 | |||
1183 | /* | ||
1184 | * See the comment in ocfs2_file_aio_read() | ||
1185 | */ | ||
1186 | ret = ocfs2_meta_lock(inode, NULL, 0); | ||
1187 | if (ret < 0) { | ||
1188 | mlog_errno(ret); | ||
1189 | goto bail; | ||
1190 | } | ||
1191 | ocfs2_meta_unlock(inode, 0); | ||
1192 | |||
1193 | ret = generic_file_splice_read(in, ppos, pipe, len, flags); | ||
1194 | |||
1195 | bail: | ||
1196 | mlog_exit(ret); | ||
1197 | return ret; | ||
1198 | } | ||
1199 | |||
1115 | static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, | 1200 | static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, |
1116 | const struct iovec *iov, | 1201 | const struct iovec *iov, |
1117 | unsigned long nr_segs, | 1202 | unsigned long nr_segs, |
@@ -1210,6 +1295,8 @@ const struct file_operations ocfs2_fops = { | |||
1210 | .aio_read = ocfs2_file_aio_read, | 1295 | .aio_read = ocfs2_file_aio_read, |
1211 | .aio_write = ocfs2_file_aio_write, | 1296 | .aio_write = ocfs2_file_aio_write, |
1212 | .ioctl = ocfs2_ioctl, | 1297 | .ioctl = ocfs2_ioctl, |
1298 | .splice_read = ocfs2_file_splice_read, | ||
1299 | .splice_write = ocfs2_file_splice_write, | ||
1213 | }; | 1300 | }; |
1214 | 1301 | ||
1215 | const struct file_operations ocfs2_dops = { | 1302 | const struct file_operations ocfs2_dops = { |