aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r--fs/ocfs2/file.c372
1 files changed, 262 insertions, 110 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1be74c4e7814..10953a508f2f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,6 +31,8 @@
31#include <linux/pagemap.h> 31#include <linux/pagemap.h>
32#include <linux/uio.h> 32#include <linux/uio.h>
33#include <linux/sched.h> 33#include <linux/sched.h>
34#include <linux/pipe_fs_i.h>
35#include <linux/mount.h>
34 36
35#define MLOG_MASK_PREFIX ML_INODE 37#define MLOG_MASK_PREFIX ML_INODE
36#include <cluster/masklog.h> 38#include <cluster/masklog.h>
@@ -66,7 +68,7 @@ static int ocfs2_file_open(struct inode *inode, struct file *file)
66 struct ocfs2_inode_info *oi = OCFS2_I(inode); 68 struct ocfs2_inode_info *oi = OCFS2_I(inode);
67 69
68 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, 70 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
69 file->f_dentry->d_name.len, file->f_dentry->d_name.name); 71 file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
70 72
71 spin_lock(&oi->ip_lock); 73 spin_lock(&oi->ip_lock);
72 74
@@ -96,8 +98,8 @@ static int ocfs2_file_release(struct inode *inode, struct file *file)
96 struct ocfs2_inode_info *oi = OCFS2_I(inode); 98 struct ocfs2_inode_info *oi = OCFS2_I(inode);
97 99
98 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, 100 mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
99 file->f_dentry->d_name.len, 101 file->f_path.dentry->d_name.len,
100 file->f_dentry->d_name.name); 102 file->f_path.dentry->d_name.name);
101 103
102 spin_lock(&oi->ip_lock); 104 spin_lock(&oi->ip_lock);
103 if (!--oi->ip_open_count) 105 if (!--oi->ip_open_count)
@@ -134,7 +136,77 @@ bail:
134 return (err < 0) ? -EIO : 0; 136 return (err < 0) ? -EIO : 0;
135} 137}
136 138
137int ocfs2_set_inode_size(struct ocfs2_journal_handle *handle, 139int ocfs2_should_update_atime(struct inode *inode,
140 struct vfsmount *vfsmnt)
141{
142 struct timespec now;
143 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
144
145 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
146 return 0;
147
148 if ((inode->i_flags & S_NOATIME) ||
149 ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
150 return 0;
151
152 /*
153 * We can be called with no vfsmnt structure - NFSD will
154 * sometimes do this.
155 *
156 * Note that our action here is different than touch_atime() -
157 * if we can't tell whether this is a noatime mount, then we
158 * don't know whether to trust the value of s_atime_quantum.
159 */
160 if (vfsmnt == NULL)
161 return 0;
162
163 if ((vfsmnt->mnt_flags & MNT_NOATIME) ||
164 ((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
165 return 0;
166
167 if (vfsmnt->mnt_flags & MNT_RELATIME) {
168 if ((timespec_compare(&inode->i_atime, &inode->i_mtime) <= 0) ||
169 (timespec_compare(&inode->i_atime, &inode->i_ctime) <= 0))
170 return 1;
171
172 return 0;
173 }
174
175 now = CURRENT_TIME;
176 if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum))
177 return 0;
178 else
179 return 1;
180}
181
182int ocfs2_update_inode_atime(struct inode *inode,
183 struct buffer_head *bh)
184{
185 int ret;
186 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
187 handle_t *handle;
188
189 mlog_entry_void();
190
191 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
192 if (handle == NULL) {
193 ret = -ENOMEM;
194 mlog_errno(ret);
195 goto out;
196 }
197
198 inode->i_atime = CURRENT_TIME;
199 ret = ocfs2_mark_inode_dirty(handle, inode, bh);
200 if (ret < 0)
201 mlog_errno(ret);
202
203 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
204out:
205 mlog_exit(ret);
206 return ret;
207}
208
209int ocfs2_set_inode_size(handle_t *handle,
138 struct inode *inode, 210 struct inode *inode,
139 struct buffer_head *fe_bh, 211 struct buffer_head *fe_bh,
140 u64 new_i_size) 212 u64 new_i_size)
@@ -163,10 +235,9 @@ static int ocfs2_simple_size_update(struct inode *inode,
163{ 235{
164 int ret; 236 int ret;
165 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 237 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
166 struct ocfs2_journal_handle *handle = NULL; 238 handle_t *handle = NULL;
167 239
168 handle = ocfs2_start_trans(osb, NULL, 240 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
169 OCFS2_INODE_UPDATE_CREDITS);
170 if (handle == NULL) { 241 if (handle == NULL) {
171 ret = -ENOMEM; 242 ret = -ENOMEM;
172 mlog_errno(ret); 243 mlog_errno(ret);
@@ -178,7 +249,7 @@ static int ocfs2_simple_size_update(struct inode *inode,
178 if (ret < 0) 249 if (ret < 0)
179 mlog_errno(ret); 250 mlog_errno(ret);
180 251
181 ocfs2_commit_trans(handle); 252 ocfs2_commit_trans(osb, handle);
182out: 253out:
183 return ret; 254 return ret;
184} 255}
@@ -189,14 +260,14 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
189 u64 new_i_size) 260 u64 new_i_size)
190{ 261{
191 int status; 262 int status;
192 struct ocfs2_journal_handle *handle; 263 handle_t *handle;
193 264
194 mlog_entry_void(); 265 mlog_entry_void();
195 266
196 /* TODO: This needs to actually orphan the inode in this 267 /* TODO: This needs to actually orphan the inode in this
197 * transaction. */ 268 * transaction. */
198 269
199 handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS); 270 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
200 if (IS_ERR(handle)) { 271 if (IS_ERR(handle)) {
201 status = PTR_ERR(handle); 272 status = PTR_ERR(handle);
202 mlog_errno(status); 273 mlog_errno(status);
@@ -207,7 +278,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
207 if (status < 0) 278 if (status < 0)
208 mlog_errno(status); 279 mlog_errno(status);
209 280
210 ocfs2_commit_trans(handle); 281 ocfs2_commit_trans(osb, handle);
211out: 282out:
212 mlog_exit(status); 283 mlog_exit(status);
213 return status; 284 return status;
@@ -328,7 +399,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
328 struct inode *inode, 399 struct inode *inode,
329 u32 clusters_to_add, 400 u32 clusters_to_add,
330 struct buffer_head *fe_bh, 401 struct buffer_head *fe_bh,
331 struct ocfs2_journal_handle *handle, 402 handle_t *handle,
332 struct ocfs2_alloc_context *data_ac, 403 struct ocfs2_alloc_context *data_ac,
333 struct ocfs2_alloc_context *meta_ac, 404 struct ocfs2_alloc_context *meta_ac,
334 enum ocfs2_alloc_restarted *reason_ret) 405 enum ocfs2_alloc_restarted *reason_ret)
@@ -433,7 +504,7 @@ static int ocfs2_extend_allocation(struct inode *inode,
433 u32 prev_clusters; 504 u32 prev_clusters;
434 struct buffer_head *bh = NULL; 505 struct buffer_head *bh = NULL;
435 struct ocfs2_dinode *fe = NULL; 506 struct ocfs2_dinode *fe = NULL;
436 struct ocfs2_journal_handle *handle = NULL; 507 handle_t *handle = NULL;
437 struct ocfs2_alloc_context *data_ac = NULL; 508 struct ocfs2_alloc_context *data_ac = NULL;
438 struct ocfs2_alloc_context *meta_ac = NULL; 509 struct ocfs2_alloc_context *meta_ac = NULL;
439 enum ocfs2_alloc_restarted why; 510 enum ocfs2_alloc_restarted why;
@@ -463,13 +534,6 @@ restart_all:
463 (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), 534 (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
464 fe->i_clusters, clusters_to_add); 535 fe->i_clusters, clusters_to_add);
465 536
466 handle = ocfs2_alloc_handle(osb);
467 if (handle == NULL) {
468 status = -ENOMEM;
469 mlog_errno(status);
470 goto leave;
471 }
472
473 num_free_extents = ocfs2_num_free_extents(osb, 537 num_free_extents = ocfs2_num_free_extents(osb,
474 inode, 538 inode,
475 fe); 539 fe);
@@ -480,10 +544,7 @@ restart_all:
480 } 544 }
481 545
482 if (!num_free_extents) { 546 if (!num_free_extents) {
483 status = ocfs2_reserve_new_metadata(osb, 547 status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac);
484 handle,
485 fe,
486 &meta_ac);
487 if (status < 0) { 548 if (status < 0) {
488 if (status != -ENOSPC) 549 if (status != -ENOSPC)
489 mlog_errno(status); 550 mlog_errno(status);
@@ -491,10 +552,7 @@ restart_all:
491 } 552 }
492 } 553 }
493 554
494 status = ocfs2_reserve_clusters(osb, 555 status = ocfs2_reserve_clusters(osb, clusters_to_add, &data_ac);
495 handle,
496 clusters_to_add,
497 &data_ac);
498 if (status < 0) { 556 if (status < 0) {
499 if (status != -ENOSPC) 557 if (status != -ENOSPC)
500 mlog_errno(status); 558 mlog_errno(status);
@@ -509,7 +567,7 @@ restart_all:
509 drop_alloc_sem = 1; 567 drop_alloc_sem = 1;
510 568
511 credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); 569 credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add);
512 handle = ocfs2_start_trans(osb, handle, credits); 570 handle = ocfs2_start_trans(osb, credits);
513 if (IS_ERR(handle)) { 571 if (IS_ERR(handle)) {
514 status = PTR_ERR(handle); 572 status = PTR_ERR(handle);
515 handle = NULL; 573 handle = NULL;
@@ -589,7 +647,7 @@ leave:
589 drop_alloc_sem = 0; 647 drop_alloc_sem = 0;
590 } 648 }
591 if (handle) { 649 if (handle) {
592 ocfs2_commit_trans(handle); 650 ocfs2_commit_trans(osb, handle);
593 handle = NULL; 651 handle = NULL;
594 } 652 }
595 if (data_ac) { 653 if (data_ac) {
@@ -624,7 +682,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
624 struct page *page; 682 struct page *page;
625 unsigned long index; 683 unsigned long index;
626 unsigned int offset; 684 unsigned int offset;
627 struct ocfs2_journal_handle *handle = NULL; 685 handle_t *handle = NULL;
628 int ret; 686 int ret;
629 687
630 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ 688 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
@@ -668,7 +726,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
668 ret = 0; 726 ret = 0;
669 727
670 if (handle) 728 if (handle)
671 ocfs2_commit_trans(handle); 729 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
672out_unlock: 730out_unlock:
673 unlock_page(page); 731 unlock_page(page);
674 page_cache_release(page); 732 page_cache_release(page);
@@ -789,7 +847,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
789 struct super_block *sb = inode->i_sb; 847 struct super_block *sb = inode->i_sb;
790 struct ocfs2_super *osb = OCFS2_SB(sb); 848 struct ocfs2_super *osb = OCFS2_SB(sb);
791 struct buffer_head *bh = NULL; 849 struct buffer_head *bh = NULL;
792 struct ocfs2_journal_handle *handle = NULL; 850 handle_t *handle = NULL;
793 851
794 mlog_entry("(0x%p, '%.*s')\n", dentry, 852 mlog_entry("(0x%p, '%.*s')\n", dentry,
795 dentry->d_name.len, dentry->d_name.name); 853 dentry->d_name.len, dentry->d_name.name);
@@ -825,7 +883,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
825 } 883 }
826 } 884 }
827 885
828 status = ocfs2_meta_lock(inode, NULL, &bh, 1); 886 status = ocfs2_meta_lock(inode, &bh, 1);
829 if (status < 0) { 887 if (status < 0) {
830 if (status != -ENOENT) 888 if (status != -ENOENT)
831 mlog_errno(status); 889 mlog_errno(status);
@@ -845,7 +903,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
845 } 903 }
846 } 904 }
847 905
848 handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS); 906 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
849 if (IS_ERR(handle)) { 907 if (IS_ERR(handle)) {
850 status = PTR_ERR(handle); 908 status = PTR_ERR(handle);
851 mlog_errno(status); 909 mlog_errno(status);
@@ -863,7 +921,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
863 mlog_errno(status); 921 mlog_errno(status);
864 922
865bail_commit: 923bail_commit:
866 ocfs2_commit_trans(handle); 924 ocfs2_commit_trans(osb, handle);
867bail_unlock: 925bail_unlock:
868 ocfs2_meta_unlock(inode, 1); 926 ocfs2_meta_unlock(inode, 1);
869bail_unlock_rw: 927bail_unlock_rw:
@@ -906,19 +964,39 @@ bail:
906 return err; 964 return err;
907} 965}
908 966
967int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd)
968{
969 int ret;
970
971 mlog_entry_void();
972
973 ret = ocfs2_meta_lock(inode, NULL, 0);
974 if (ret) {
975 mlog_errno(ret);
976 goto out;
977 }
978
979 ret = generic_permission(inode, mask, NULL);
980
981 ocfs2_meta_unlock(inode, 0);
982out:
983 mlog_exit(ret);
984 return ret;
985}
986
909static int ocfs2_write_remove_suid(struct inode *inode) 987static int ocfs2_write_remove_suid(struct inode *inode)
910{ 988{
911 int ret; 989 int ret;
912 struct buffer_head *bh = NULL; 990 struct buffer_head *bh = NULL;
913 struct ocfs2_inode_info *oi = OCFS2_I(inode); 991 struct ocfs2_inode_info *oi = OCFS2_I(inode);
914 struct ocfs2_journal_handle *handle; 992 handle_t *handle;
915 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 993 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
916 struct ocfs2_dinode *di; 994 struct ocfs2_dinode *di;
917 995
918 mlog_entry("(Inode %llu, mode 0%o)\n", 996 mlog_entry("(Inode %llu, mode 0%o)\n",
919 (unsigned long long)oi->ip_blkno, inode->i_mode); 997 (unsigned long long)oi->ip_blkno, inode->i_mode);
920 998
921 handle = ocfs2_start_trans(osb, NULL, OCFS2_INODE_UPDATE_CREDITS); 999 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
922 if (handle == NULL) { 1000 if (handle == NULL) {
923 ret = -ENOMEM; 1001 ret = -ENOMEM;
924 mlog_errno(ret); 1002 mlog_errno(ret);
@@ -951,75 +1029,29 @@ static int ocfs2_write_remove_suid(struct inode *inode)
951out_bh: 1029out_bh:
952 brelse(bh); 1030 brelse(bh);
953out_trans: 1031out_trans:
954 ocfs2_commit_trans(handle); 1032 ocfs2_commit_trans(osb, handle);
955out: 1033out:
956 mlog_exit(ret); 1034 mlog_exit(ret);
957 return ret; 1035 return ret;
958} 1036}
959 1037
960static inline int ocfs2_write_should_remove_suid(struct inode *inode) 1038static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
961{ 1039 loff_t *ppos,
962 mode_t mode = inode->i_mode; 1040 size_t count,
963 1041 int appending)
964 if (!capable(CAP_FSETID)) {
965 if (unlikely(mode & S_ISUID))
966 return 1;
967
968 if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
969 return 1;
970 }
971 return 0;
972}
973
974static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
975 const struct iovec *iov,
976 unsigned long nr_segs,
977 loff_t pos)
978{ 1042{
979 int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0; 1043 int ret = 0, meta_level = appending;
1044 struct inode *inode = dentry->d_inode;
980 u32 clusters; 1045 u32 clusters;
981 struct file *filp = iocb->ki_filp;
982 struct inode *inode = filp->f_dentry->d_inode;
983 loff_t newsize, saved_pos; 1046 loff_t newsize, saved_pos;
984 1047
985 mlog_entry("(0x%p, %u, '%.*s')\n", filp,
986 (unsigned int)nr_segs,
987 filp->f_dentry->d_name.len,
988 filp->f_dentry->d_name.name);
989
990 /* happy write of zero bytes */
991 if (iocb->ki_left == 0)
992 return 0;
993
994 if (!inode) {
995 mlog(0, "bad inode\n");
996 return -EIO;
997 }
998
999 mutex_lock(&inode->i_mutex);
1000 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
1001 if (filp->f_flags & O_DIRECT) {
1002 have_alloc_sem = 1;
1003 down_read(&inode->i_alloc_sem);
1004 }
1005
1006 /* concurrent O_DIRECT writes are allowed */
1007 rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1;
1008 ret = ocfs2_rw_lock(inode, rw_level);
1009 if (ret < 0) {
1010 rw_level = -1;
1011 mlog_errno(ret);
1012 goto out;
1013 }
1014
1015 /* 1048 /*
1016 * We sample i_size under a read level meta lock to see if our write 1049 * We sample i_size under a read level meta lock to see if our write
1017 * is extending the file, if it is we back off and get a write level 1050 * is extending the file, if it is we back off and get a write level
1018 * meta lock. 1051 * meta lock.
1019 */ 1052 */
1020 meta_level = (filp->f_flags & O_APPEND) ? 1 : 0;
1021 for(;;) { 1053 for(;;) {
1022 ret = ocfs2_meta_lock(inode, NULL, NULL, meta_level); 1054 ret = ocfs2_meta_lock(inode, NULL, meta_level);
1023 if (ret < 0) { 1055 if (ret < 0) {
1024 meta_level = -1; 1056 meta_level = -1;
1025 mlog_errno(ret); 1057 mlog_errno(ret);
@@ -1035,7 +1067,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1035 * inode. There's also the dinode i_size state which 1067 * inode. There's also the dinode i_size state which
1036 * can be lost via setattr during extending writes (we 1068 * can be lost via setattr during extending writes (we
1037 * set inode->i_size at the end of a write. */ 1069 * set inode->i_size at the end of a write. */
1038 if (ocfs2_write_should_remove_suid(inode)) { 1070 if (should_remove_suid(dentry)) {
1039 if (meta_level == 0) { 1071 if (meta_level == 0) {
1040 ocfs2_meta_unlock(inode, meta_level); 1072 ocfs2_meta_unlock(inode, meta_level);
1041 meta_level = 1; 1073 meta_level = 1;
@@ -1045,19 +1077,19 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1045 ret = ocfs2_write_remove_suid(inode); 1077 ret = ocfs2_write_remove_suid(inode);
1046 if (ret < 0) { 1078 if (ret < 0) {
1047 mlog_errno(ret); 1079 mlog_errno(ret);
1048 goto out; 1080 goto out_unlock;
1049 } 1081 }
1050 } 1082 }
1051 1083
1052 /* work on a copy of ppos until we're sure that we won't have 1084 /* work on a copy of ppos until we're sure that we won't have
1053 * to recalculate it due to relocking. */ 1085 * to recalculate it due to relocking. */
1054 if (filp->f_flags & O_APPEND) { 1086 if (appending) {
1055 saved_pos = i_size_read(inode); 1087 saved_pos = i_size_read(inode);
1056 mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); 1088 mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos);
1057 } else { 1089 } else {
1058 saved_pos = iocb->ki_pos; 1090 saved_pos = *ppos;
1059 } 1091 }
1060 newsize = iocb->ki_left + saved_pos; 1092 newsize = count + saved_pos;
1061 1093
1062 mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", 1094 mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
1063 (long long) saved_pos, (long long) newsize, 1095 (long long) saved_pos, (long long) newsize,
@@ -1090,19 +1122,66 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1090 if (!clusters) 1122 if (!clusters)
1091 break; 1123 break;
1092 1124
1093 ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left); 1125 ret = ocfs2_extend_file(inode, NULL, newsize, count);
1094 if (ret < 0) { 1126 if (ret < 0) {
1095 if (ret != -ENOSPC) 1127 if (ret != -ENOSPC)
1096 mlog_errno(ret); 1128 mlog_errno(ret);
1097 goto out; 1129 goto out_unlock;
1098 } 1130 }
1099 break; 1131 break;
1100 } 1132 }
1101 1133
1102 /* ok, we're done with i_size and alloc work */ 1134 if (appending)
1103 iocb->ki_pos = saved_pos; 1135 *ppos = saved_pos;
1136
1137out_unlock:
1104 ocfs2_meta_unlock(inode, meta_level); 1138 ocfs2_meta_unlock(inode, meta_level);
1105 meta_level = -1; 1139
1140out:
1141 return ret;
1142}
1143
1144static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1145 const struct iovec *iov,
1146 unsigned long nr_segs,
1147 loff_t pos)
1148{
1149 int ret, rw_level, have_alloc_sem = 0;
1150 struct file *filp = iocb->ki_filp;
1151 struct inode *inode = filp->f_path.dentry->d_inode;
1152 int appending = filp->f_flags & O_APPEND ? 1 : 0;
1153
1154 mlog_entry("(0x%p, %u, '%.*s')\n", filp,
1155 (unsigned int)nr_segs,
1156 filp->f_path.dentry->d_name.len,
1157 filp->f_path.dentry->d_name.name);
1158
1159 /* happy write of zero bytes */
1160 if (iocb->ki_left == 0)
1161 return 0;
1162
1163 mutex_lock(&inode->i_mutex);
1164 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
1165 if (filp->f_flags & O_DIRECT) {
1166 have_alloc_sem = 1;
1167 down_read(&inode->i_alloc_sem);
1168 }
1169
1170 /* concurrent O_DIRECT writes are allowed */
1171 rw_level = (filp->f_flags & O_DIRECT) ? 0 : 1;
1172 ret = ocfs2_rw_lock(inode, rw_level);
1173 if (ret < 0) {
1174 rw_level = -1;
1175 mlog_errno(ret);
1176 goto out;
1177 }
1178
1179 ret = ocfs2_prepare_inode_for_write(filp->f_path.dentry, &iocb->ki_pos,
1180 iocb->ki_left, appending);
1181 if (ret < 0) {
1182 mlog_errno(ret);
1183 goto out;
1184 }
1106 1185
1107 /* communicate with ocfs2_dio_end_io */ 1186 /* communicate with ocfs2_dio_end_io */
1108 ocfs2_iocb_set_rw_locked(iocb); 1187 ocfs2_iocb_set_rw_locked(iocb);
@@ -1128,8 +1207,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
1128 } 1207 }
1129 1208
1130out: 1209out:
1131 if (meta_level != -1)
1132 ocfs2_meta_unlock(inode, meta_level);
1133 if (have_alloc_sem) 1210 if (have_alloc_sem)
1134 up_read(&inode->i_alloc_sem); 1211 up_read(&inode->i_alloc_sem);
1135 if (rw_level != -1) 1212 if (rw_level != -1)
@@ -1140,19 +1217,90 @@ out:
1140 return ret; 1217 return ret;
1141} 1218}
1142 1219
1220static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1221 struct file *out,
1222 loff_t *ppos,
1223 size_t len,
1224 unsigned int flags)
1225{
1226 int ret;
1227 struct inode *inode = out->f_path.dentry->d_inode;
1228
1229 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
1230 (unsigned int)len,
1231 out->f_path.dentry->d_name.len,
1232 out->f_path.dentry->d_name.name);
1233
1234 inode_double_lock(inode, pipe->inode);
1235
1236 ret = ocfs2_rw_lock(inode, 1);
1237 if (ret < 0) {
1238 mlog_errno(ret);
1239 goto out;
1240 }
1241
1242 ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0);
1243 if (ret < 0) {
1244 mlog_errno(ret);
1245 goto out_unlock;
1246 }
1247
1248 /* ok, we're done with i_size and alloc work */
1249 ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
1250
1251out_unlock:
1252 ocfs2_rw_unlock(inode, 1);
1253out:
1254 inode_double_unlock(inode, pipe->inode);
1255
1256 mlog_exit(ret);
1257 return ret;
1258}
1259
1260static ssize_t ocfs2_file_splice_read(struct file *in,
1261 loff_t *ppos,
1262 struct pipe_inode_info *pipe,
1263 size_t len,
1264 unsigned int flags)
1265{
1266 int ret = 0;
1267 struct inode *inode = in->f_path.dentry->d_inode;
1268
1269 mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
1270 (unsigned int)len,
1271 in->f_path.dentry->d_name.len,
1272 in->f_path.dentry->d_name.name);
1273
1274 /*
1275 * See the comment in ocfs2_file_aio_read()
1276 */
1277 ret = ocfs2_meta_lock(inode, NULL, 0);
1278 if (ret < 0) {
1279 mlog_errno(ret);
1280 goto bail;
1281 }
1282 ocfs2_meta_unlock(inode, 0);
1283
1284 ret = generic_file_splice_read(in, ppos, pipe, len, flags);
1285
1286bail:
1287 mlog_exit(ret);
1288 return ret;
1289}
1290
1143static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, 1291static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
1144 const struct iovec *iov, 1292 const struct iovec *iov,
1145 unsigned long nr_segs, 1293 unsigned long nr_segs,
1146 loff_t pos) 1294 loff_t pos)
1147{ 1295{
1148 int ret = 0, rw_level = -1, have_alloc_sem = 0; 1296 int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
1149 struct file *filp = iocb->ki_filp; 1297 struct file *filp = iocb->ki_filp;
1150 struct inode *inode = filp->f_dentry->d_inode; 1298 struct inode *inode = filp->f_path.dentry->d_inode;
1151 1299
1152 mlog_entry("(0x%p, %u, '%.*s')\n", filp, 1300 mlog_entry("(0x%p, %u, '%.*s')\n", filp,
1153 (unsigned int)nr_segs, 1301 (unsigned int)nr_segs,
1154 filp->f_dentry->d_name.len, 1302 filp->f_path.dentry->d_name.len,
1155 filp->f_dentry->d_name.name); 1303 filp->f_path.dentry->d_name.name);
1156 1304
1157 if (!inode) { 1305 if (!inode) {
1158 ret = -EINVAL; 1306 ret = -EINVAL;
@@ -1187,12 +1335,12 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
1187 * like i_size. This allows the checks down below 1335 * like i_size. This allows the checks down below
1188 * generic_file_aio_read() a chance of actually working. 1336 * generic_file_aio_read() a chance of actually working.
1189 */ 1337 */
1190 ret = ocfs2_meta_lock(inode, NULL, NULL, 0); 1338 ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
1191 if (ret < 0) { 1339 if (ret < 0) {
1192 mlog_errno(ret); 1340 mlog_errno(ret);
1193 goto bail; 1341 goto bail;
1194 } 1342 }
1195 ocfs2_meta_unlock(inode, 0); 1343 ocfs2_meta_unlock(inode, lock_level);
1196 1344
1197 ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); 1345 ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
1198 if (ret == -EINVAL) 1346 if (ret == -EINVAL)
@@ -1220,11 +1368,13 @@ bail:
1220struct inode_operations ocfs2_file_iops = { 1368struct inode_operations ocfs2_file_iops = {
1221 .setattr = ocfs2_setattr, 1369 .setattr = ocfs2_setattr,
1222 .getattr = ocfs2_getattr, 1370 .getattr = ocfs2_getattr,
1371 .permission = ocfs2_permission,
1223}; 1372};
1224 1373
1225struct inode_operations ocfs2_special_file_iops = { 1374struct inode_operations ocfs2_special_file_iops = {
1226 .setattr = ocfs2_setattr, 1375 .setattr = ocfs2_setattr,
1227 .getattr = ocfs2_getattr, 1376 .getattr = ocfs2_getattr,
1377 .permission = ocfs2_permission,
1228}; 1378};
1229 1379
1230const struct file_operations ocfs2_fops = { 1380const struct file_operations ocfs2_fops = {
@@ -1238,6 +1388,8 @@ const struct file_operations ocfs2_fops = {
1238 .aio_read = ocfs2_file_aio_read, 1388 .aio_read = ocfs2_file_aio_read,
1239 .aio_write = ocfs2_file_aio_write, 1389 .aio_write = ocfs2_file_aio_write,
1240 .ioctl = ocfs2_ioctl, 1390 .ioctl = ocfs2_ioctl,
1391 .splice_read = ocfs2_file_splice_read,
1392 .splice_write = ocfs2_file_splice_write,
1241}; 1393};
1242 1394
1243const struct file_operations ocfs2_dops = { 1395const struct file_operations ocfs2_dops = {