diff options
author | Yehuda Sadeh <yehuda@hq.newdream.net> | 2010-02-09 14:02:51 -0500 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-02-11 14:48:50 -0500 |
commit | 4af6b2257ee0eb8f4bf3b1dc8acb643c0e8a887f (patch) | |
tree | 6193e59ebd7806d286e16e091a5acf98bceae424 /fs | |
parent | 972f0d3ab1a15cb5d790dd8c53903066084b28f2 (diff) |
ceph: refactor ceph_write_begin, fix ceph_page_mkwrite
Originally ceph_page_mkwrite called ceph_write_begin, hoping that
the returned locked page would be the page that it was requested
to mkwrite. Factored out relevant part of ceph_page_mkwrite and
we lock the right page anyway.
Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net>
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/addr.c | 80 |
1 files changed, 51 insertions, 29 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 92f482150742..89c5ff3b59d5 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -907,15 +907,13 @@ static int context_is_writeable_or_written(struct inode *inode, | |||
907 | * We are only allowed to write into/dirty the page if the page is | 907 | * We are only allowed to write into/dirty the page if the page is |
908 | * clean, or already dirty within the same snap context. | 908 | * clean, or already dirty within the same snap context. |
909 | */ | 909 | */ |
910 | static int ceph_write_begin(struct file *file, struct address_space *mapping, | 910 | static int ceph_update_writeable_page(struct file *file, |
911 | loff_t pos, unsigned len, unsigned flags, | 911 | loff_t pos, unsigned len, |
912 | struct page **pagep, void **fsdata) | 912 | struct page *page) |
913 | { | 913 | { |
914 | struct inode *inode = file->f_dentry->d_inode; | 914 | struct inode *inode = file->f_dentry->d_inode; |
915 | struct ceph_inode_info *ci = ceph_inode(inode); | 915 | struct ceph_inode_info *ci = ceph_inode(inode); |
916 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 916 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; |
917 | struct page *page; | ||
918 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
919 | loff_t page_off = pos & PAGE_CACHE_MASK; | 917 | loff_t page_off = pos & PAGE_CACHE_MASK; |
920 | int pos_in_page = pos & ~PAGE_CACHE_MASK; | 918 | int pos_in_page = pos & ~PAGE_CACHE_MASK; |
921 | int end_in_page = pos_in_page + len; | 919 | int end_in_page = pos_in_page + len; |
@@ -923,16 +921,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, | |||
923 | struct ceph_snap_context *snapc; | 921 | struct ceph_snap_context *snapc; |
924 | int r; | 922 | int r; |
925 | 923 | ||
926 | /* get a page*/ | ||
927 | retry: | ||
928 | page = grab_cache_page_write_begin(mapping, index, 0); | ||
929 | if (!page) | ||
930 | return -ENOMEM; | ||
931 | *pagep = page; | ||
932 | |||
933 | dout("write_begin file %p inode %p page %p %d~%d\n", file, | ||
934 | inode, page, (int)pos, (int)len); | ||
935 | |||
936 | retry_locked: | 924 | retry_locked: |
937 | /* writepages currently holds page lock, but if we change that later, */ | 925 | /* writepages currently holds page lock, but if we change that later, */ |
938 | wait_on_page_writeback(page); | 926 | wait_on_page_writeback(page); |
@@ -964,7 +952,7 @@ retry_locked: | |||
964 | wait_event_interruptible(ci->i_cap_wq, | 952 | wait_event_interruptible(ci->i_cap_wq, |
965 | context_is_writeable_or_written(inode, snapc)); | 953 | context_is_writeable_or_written(inode, snapc)); |
966 | ceph_put_snap_context(snapc); | 954 | ceph_put_snap_context(snapc); |
967 | goto retry; | 955 | return -EAGAIN; |
968 | } | 956 | } |
969 | 957 | ||
970 | /* yay, writeable, do it now (without dropping page lock) */ | 958 | /* yay, writeable, do it now (without dropping page lock) */ |
@@ -1022,6 +1010,35 @@ fail_nosnap: | |||
1022 | } | 1010 | } |
1023 | 1011 | ||
1024 | /* | 1012 | /* |
1013 | * We are only allowed to write into/dirty the page if the page is | ||
1014 | * clean, or already dirty within the same snap context. | ||
1015 | */ | ||
1016 | static int ceph_write_begin(struct file *file, struct address_space *mapping, | ||
1017 | loff_t pos, unsigned len, unsigned flags, | ||
1018 | struct page **pagep, void **fsdata) | ||
1019 | { | ||
1020 | struct inode *inode = file->f_dentry->d_inode; | ||
1021 | struct page *page; | ||
1022 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
1023 | int r; | ||
1024 | |||
1025 | do { | ||
1026 | /* get a page*/ | ||
1027 | page = grab_cache_page_write_begin(mapping, index, 0); | ||
1028 | if (!page) | ||
1029 | return -ENOMEM; | ||
1030 | *pagep = page; | ||
1031 | |||
1032 | dout("write_begin file %p inode %p page %p %d~%d\n", file, | ||
1033 | inode, page, (int)pos, (int)len); | ||
1034 | |||
1035 | r = ceph_update_writeable_page(file, pos, len, page); | ||
1036 | } while (r == -EAGAIN); | ||
1037 | |||
1038 | return r; | ||
1039 | } | ||
1040 | |||
1041 | /* | ||
1025 | * we don't do anything in here that simple_write_end doesn't do | 1042 | * we don't do anything in here that simple_write_end doesn't do |
1026 | * except adjust dirty page accounting and drop read lock on | 1043 | * except adjust dirty page accounting and drop read lock on |
1027 | * mdsc->snap_rwsem. | 1044 | * mdsc->snap_rwsem. |
@@ -1104,8 +1121,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1104 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 1121 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; |
1105 | loff_t off = page->index << PAGE_CACHE_SHIFT; | 1122 | loff_t off = page->index << PAGE_CACHE_SHIFT; |
1106 | loff_t size, len; | 1123 | loff_t size, len; |
1107 | struct page *locked_page = NULL; | ||
1108 | void *fsdata = NULL; | ||
1109 | int ret; | 1124 | int ret; |
1110 | 1125 | ||
1111 | size = i_size_read(inode); | 1126 | size = i_size_read(inode); |
@@ -1116,23 +1131,30 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1116 | 1131 | ||
1117 | dout("page_mkwrite %p %llu~%llu page %p idx %lu\n", inode, | 1132 | dout("page_mkwrite %p %llu~%llu page %p idx %lu\n", inode, |
1118 | off, len, page, page->index); | 1133 | off, len, page, page->index); |
1119 | ret = ceph_write_begin(vma->vm_file, inode->i_mapping, off, len, 0, | 1134 | |
1120 | &locked_page, &fsdata); | 1135 | lock_page(page); |
1121 | WARN_ON(page != locked_page); | 1136 | |
1122 | if (!ret) { | 1137 | ret = VM_FAULT_NOPAGE; |
1123 | /* | 1138 | if ((off > size) || |
1124 | * doing the following, instead of calling | 1139 | (page->mapping != inode->i_mapping)) |
1125 | * ceph_write_end. Note that we keep the | 1140 | goto out; |
1126 | * page locked | 1141 | |
1127 | */ | 1142 | ret = ceph_update_writeable_page(vma->vm_file, off, len, page); |
1143 | if (ret == 0) { | ||
1144 | /* success. we'll keep the page locked. */ | ||
1128 | set_page_dirty(page); | 1145 | set_page_dirty(page); |
1129 | up_read(&mdsc->snap_rwsem); | 1146 | up_read(&mdsc->snap_rwsem); |
1130 | page_cache_release(page); | ||
1131 | ret = VM_FAULT_LOCKED; | 1147 | ret = VM_FAULT_LOCKED; |
1132 | } else { | 1148 | } else { |
1133 | ret = VM_FAULT_SIGBUS; | 1149 | if (ret == -ENOMEM) |
1150 | ret = VM_FAULT_OOM; | ||
1151 | else | ||
1152 | ret = VM_FAULT_SIGBUS; | ||
1134 | } | 1153 | } |
1154 | out: | ||
1135 | dout("page_mkwrite %p %llu~%llu = %d\n", inode, off, len, ret); | 1155 | dout("page_mkwrite %p %llu~%llu = %d\n", inode, off, len, ret); |
1156 | if (ret != VM_FAULT_LOCKED) | ||
1157 | unlock_page(page); | ||
1136 | return ret; | 1158 | return ret; |
1137 | } | 1159 | } |
1138 | 1160 | ||