aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTiger Yang <tiger.yang@oracle.com>2006-10-17 21:29:52 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2006-12-01 21:28:46 -0500
commit8659ac25b434fcc61cf7797f4b69edc3eaaffb55 (patch)
treed839abc4ad988c23a5b34c1935742310f4b6cc7f
parente88d0c9a4180821ad64c1fb421e4c28f8155eb74 (diff)
ocfs2: Add splice support
Add splice read/write support in ocfs2. ocfs2_file_splice_read/write are very similar to ocfs2_file_aio_read/write. Signed-off-by: Tiger Yang <tiger.yang@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r--fs/ocfs2/file.c187
1 files changed, 137 insertions, 50 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 39b2f1653e25..b32cdb3bf7c5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,6 +31,7 @@
31#include <linux/pagemap.h> 31#include <linux/pagemap.h>
32#include <linux/uio.h> 32#include <linux/uio.h>
33#include <linux/sched.h> 33#include <linux/sched.h>
34#include <linux/pipe_fs_i.h>
34 35
35#define MLOG_MASK_PREFIX ML_INODE 36#define MLOG_MASK_PREFIX ML_INODE
36#include <cluster/masklog.h> 37#include <cluster/masklog.h>
@@ -943,53 +944,21 @@ out:
943 return ret; 944 return ret;
944} 945}
945 946
946static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, 947static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
947 const struct iovec *iov, 948 loff_t *ppos,
948 unsigned long nr_segs, 949 size_t count,
949 loff_t pos) 950 int appending)
950{ 951{
951 int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0; 952 int ret = 0, meta_level = appending;
953 struct inode *inode = dentry->d_inode;
952 u32 clusters; 954 u32 clusters;
953 struct file *filp = iocb->ki_filp;
954 struct inode *inode = filp->f_dentry->d_inode;
955 loff_t newsize, saved_pos; 955 loff_t newsize, saved_pos;
956 956
957 mlog_entry("(0x%p, %u, '%.*s')\n", filp,
958 (unsigned int)nr_segs,
959 filp->f_dentry->d_name.len,
960 filp->f_dentry->d_name.name);
961
962 /* happy write of zero bytes */
963 if (iocb->ki_left == 0)
964 return 0;
965
966 if (!inode) {
967 mlog(0, "bad inode\n");
968 return -EIO;
969 }
970
971 mutex_lock(&inode->i_mutex);
972 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
973 if (filp->f_flags & O_DIRECT) {
974 have_alloc_sem = 1;
975 down_read(&inode->i_alloc_sem);
976 }
977
978 /* concurrent O_DIRECT writes are allowed */
979 rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1;
980 ret = ocfs2_rw_lock(inode, rw_level);
981 if (ret < 0) {
982 rw_level = -1;
983 mlog_errno(ret);
984 goto out;
985 }
986
987 /* 957 /*
988 * We sample i_size under a read level meta lock to see if our write 958 * We sample i_size under a read level meta lock to see if our write
989 * is extending the file, if it is we back off and get a write level 959 * is extending the file, if it is we back off and get a write level
990 * meta lock. 960 * meta lock.
991 */ 961 */
992 meta_level = (filp->f_flags & O_APPEND) ? 1 : 0;
993 for(;;) { 962 for(;;) {
994 ret = ocfs2_meta_lock(inode, NULL, meta_level); 963 ret = ocfs2_meta_lock(inode, NULL, meta_level);
995 if (ret < 0) { 964 if (ret < 0) {
@@ -1007,7 +976,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1007 * inode. There's also the dinode i_size state which 976 * inode. There's also the dinode i_size state which
1008 * can be lost via setattr during extending writes (we 977 * can be lost via setattr during extending writes (we
1009 * set inode->i_size at the end of a write. */ 978 * set inode->i_size at the end of a write. */
1010 if (should_remove_suid(filp->f_dentry)) { 979 if (should_remove_suid(dentry)) {
1011 if (meta_level == 0) { 980 if (meta_level == 0) {
1012 ocfs2_meta_unlock(inode, meta_level); 981 ocfs2_meta_unlock(inode, meta_level);
1013 meta_level = 1; 982 meta_level = 1;
@@ -1017,19 +986,19 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1017 ret = ocfs2_write_remove_suid(inode); 986 ret = ocfs2_write_remove_suid(inode);
1018 if (ret < 0) { 987 if (ret < 0) {
1019 mlog_errno(ret); 988 mlog_errno(ret);
1020 goto out; 989 goto out_unlock;
1021 } 990 }
1022 } 991 }
1023 992
1024 /* work on a copy of ppos until we're sure that we won't have 993 /* work on a copy of ppos until we're sure that we won't have
1025 * to recalculate it due to relocking. */ 994 * to recalculate it due to relocking. */
1026 if (filp->f_flags & O_APPEND) { 995 if (appending) {
1027 saved_pos = i_size_read(inode); 996 saved_pos = i_size_read(inode);
1028 mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); 997 mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos);
1029 } else { 998 } else {
1030 saved_pos = iocb->ki_pos; 999 saved_pos = *ppos;
1031 } 1000 }
1032 newsize = iocb->ki_left + saved_pos; 1001 newsize = count + saved_pos;
1033 1002
1034 mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", 1003 mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
1035 (long long) saved_pos, (long long) newsize, 1004 (long long) saved_pos, (long long) newsize,
@@ -1062,19 +1031,66 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1062 if (!clusters) 1031 if (!clusters)
1063 break; 1032 break;
1064 1033
1065 ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left); 1034 ret = ocfs2_extend_file(inode, NULL, newsize, count);
1066 if (ret < 0) { 1035 if (ret < 0) {
1067 if (ret != -ENOSPC) 1036 if (ret != -ENOSPC)
1068 mlog_errno(ret); 1037 mlog_errno(ret);
1069 goto out; 1038 goto out_unlock;
1070 } 1039 }
1071 break; 1040 break;
1072 } 1041 }
1073 1042
1074 /* ok, we're done with i_size and alloc work */ 1043 if (appending)
1075 iocb->ki_pos = saved_pos; 1044 *ppos = saved_pos;
1045
1046out_unlock:
1076 ocfs2_meta_unlock(inode, meta_level); 1047 ocfs2_meta_unlock(inode, meta_level);
1077 meta_level = -1; 1048
1049out:
1050 return ret;
1051}
1052
1053static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1054 const struct iovec *iov,
1055 unsigned long nr_segs,
1056 loff_t pos)
1057{
1058 int ret, rw_level, have_alloc_sem = 0;
1059 struct file *filp = iocb->ki_filp;
1060 struct inode *inode = filp->f_dentry->d_inode;
1061 int appending = filp->f_flags & O_APPEND ? 1 : 0;
1062
1063 mlog_entry("(0x%p, %u, '%.*s')\n", filp,
1064 (unsigned int)nr_segs,
1065 filp->f_dentry->d_name.len,
1066 filp->f_dentry->d_name.name);
1067
1068 /* happy write of zero bytes */
1069 if (iocb->ki_left == 0)
1070 return 0;
1071
1072 mutex_lock(&inode->i_mutex);
1073 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
1074 if (filp->f_flags & O_DIRECT) {
1075 have_alloc_sem = 1;
1076 down_read(&inode->i_alloc_sem);
1077 }
1078
1079 /* concurrent O_DIRECT writes are allowed */
1080 rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1;
1081 ret = ocfs2_rw_lock(inode, rw_level);
1082 if (ret < 0) {
1083 rw_level = -1;
1084 mlog_errno(ret);
1085 goto out;
1086 }
1087
1088 ret = ocfs2_prepare_inode_for_write(filp->f_dentry, &iocb->ki_pos,
1089 iocb->ki_left, appending);
1090 if (ret < 0) {
1091 mlog_errno(ret);
1092 goto out;
1093 }
1078 1094
1079 /* communicate with ocfs2_dio_end_io */ 1095 /* communicate with ocfs2_dio_end_io */
1080 ocfs2_iocb_set_rw_locked(iocb); 1096 ocfs2_iocb_set_rw_locked(iocb);
@@ -1100,8 +1116,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1100 } 1116 }
1101 1117
1102out: 1118out:
1103 if (meta_level != -1)
1104 ocfs2_meta_unlock(inode, meta_level);
1105 if (have_alloc_sem) 1119 if (have_alloc_sem)
1106 up_read(&inode->i_alloc_sem); 1120 up_read(&inode->i_alloc_sem);
1107 if (rw_level != -1) 1121 if (rw_level != -1)
@@ -1112,6 +1126,77 @@ out:
1112 return ret; 1126 return ret;
1113} 1127}
1114 1128
1129static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1130 struct file *out,
1131 loff_t *ppos,
1132 size_t len,
1133 unsigned int flags)
1134{
1135 int ret;
1136 struct inode *inode = out->f_dentry->d_inode;
1137
1138 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
1139 (unsigned int)len,
1140 out->f_dentry->d_name.len,
1141 out->f_dentry->d_name.name);
1142
1143 inode_double_lock(inode, pipe->inode);
1144
1145 ret = ocfs2_rw_lock(inode, 1);
1146 if (ret < 0) {
1147 mlog_errno(ret);
1148 goto out;
1149 }
1150
1151 ret = ocfs2_prepare_inode_for_write(out->f_dentry, ppos, len, 0);
1152 if (ret < 0) {
1153 mlog_errno(ret);
1154 goto out_unlock;
1155 }
1156
1157 /* ok, we're done with i_size and alloc work */
1158 ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
1159
1160out_unlock:
1161 ocfs2_rw_unlock(inode, 1);
1162out:
1163 inode_double_unlock(inode, pipe->inode);
1164
1165 mlog_exit(ret);
1166 return ret;
1167}
1168
1169static ssize_t ocfs2_file_splice_read(struct file *in,
1170 loff_t *ppos,
1171 struct pipe_inode_info *pipe,
1172 size_t len,
1173 unsigned int flags)
1174{
1175 int ret = 0;
1176 struct inode *inode = in->f_dentry->d_inode;
1177
1178 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
1179 (unsigned int)len,
1180 in->f_dentry->d_name.len,
1181 in->f_dentry->d_name.name);
1182
1183 /*
1184 * See the comment in ocfs2_file_aio_read()
1185 */
1186 ret = ocfs2_meta_lock(inode, NULL, 0);
1187 if (ret < 0) {
1188 mlog_errno(ret);
1189 goto bail;
1190 }
1191 ocfs2_meta_unlock(inode, 0);
1192
1193 ret = generic_file_splice_read(in, ppos, pipe, len, flags);
1194
1195bail:
1196 mlog_exit(ret);
1197 return ret;
1198}
1199
1115static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, 1200static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
1116 const struct iovec *iov, 1201 const struct iovec *iov,
1117 unsigned long nr_segs, 1202 unsigned long nr_segs,
@@ -1210,6 +1295,8 @@ const struct file_operations ocfs2_fops = {
1210 .aio_read = ocfs2_file_aio_read, 1295 .aio_read = ocfs2_file_aio_read,
1211 .aio_write = ocfs2_file_aio_write, 1296 .aio_write = ocfs2_file_aio_write,
1212 .ioctl = ocfs2_ioctl, 1297 .ioctl = ocfs2_ioctl,
1298 .splice_read = ocfs2_file_splice_read,
1299 .splice_write = ocfs2_file_splice_write,
1213}; 1300};
1214 1301
1215const struct file_operations ocfs2_dops = { 1302const struct file_operations ocfs2_dops = {