aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2014-11-14 09:10:07 -0500
committerIlya Dryomov <idryomov@redhat.com>2014-12-17 12:09:52 -0500
commit3738daa68a5121ad7dd0318bca931e2a6afb0e8c (patch)
tree8cb3a27c974fa834c2241e9f7335be405053492b /fs/ceph
parent01deead041e03c9a6b4e1b2dd165dee4cced6112 (diff)
ceph: fetch inline data when getting Fcr cap refs
we can't use getattr to fetch inline data after getting Fcr caps, because it can cause deadlock. The solution is try bringing inline data to page cache when not holding any cap, and hope the inline data page is still there after getting the Fcr caps. If the page is still there, pin it in page cache for later IO. Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c9
-rw-r--r--fs/ceph/caps.c60
-rw-r--r--fs/ceph/file.c12
3 files changed, 63 insertions, 18 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4a3f55f27ab4..5d2b88e3ff0b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1207,6 +1207,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1207 struct inode *inode = file_inode(vma->vm_file); 1207 struct inode *inode = file_inode(vma->vm_file);
1208 struct ceph_inode_info *ci = ceph_inode(inode); 1208 struct ceph_inode_info *ci = ceph_inode(inode);
1209 struct ceph_file_info *fi = vma->vm_file->private_data; 1209 struct ceph_file_info *fi = vma->vm_file->private_data;
1210 struct page *pinned_page = NULL;
1210 loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT; 1211 loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
1211 int want, got, ret; 1212 int want, got, ret;
1212 1213
@@ -1218,7 +1219,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1218 want = CEPH_CAP_FILE_CACHE; 1219 want = CEPH_CAP_FILE_CACHE;
1219 while (1) { 1220 while (1) {
1220 got = 0; 1221 got = 0;
1221 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); 1222 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1,
1223 &got, &pinned_page);
1222 if (ret == 0) 1224 if (ret == 0)
1223 break; 1225 break;
1224 if (ret != -ERESTARTSYS) { 1226 if (ret != -ERESTARTSYS) {
@@ -1233,6 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1233 1235
1234 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n", 1236 dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
1235 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret); 1237 inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
1238 if (pinned_page)
1239 page_cache_release(pinned_page);
1236 ceph_put_cap_refs(ci, got); 1240 ceph_put_cap_refs(ci, got);
1237 1241
1238 return ret; 1242 return ret;
@@ -1266,7 +1270,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1266 want = CEPH_CAP_FILE_BUFFER; 1270 want = CEPH_CAP_FILE_BUFFER;
1267 while (1) { 1271 while (1) {
1268 got = 0; 1272 got = 0;
1269 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len); 1273 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
1274 &got, NULL);
1270 if (ret == 0) 1275 if (ret == 0)
1271 break; 1276 break;
1272 if (ret != -ERESTARTSYS) { 1277 if (ret != -ERESTARTSYS) {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6372eb9ce491..795afe304871 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2057,15 +2057,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
2057 * requested from the MDS. 2057 * requested from the MDS.
2058 */ 2058 */
2059static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, 2059static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2060 int *got, loff_t endoff, int *check_max, int *err) 2060 loff_t endoff, int *got, struct page **pinned_page,
2061 int *check_max, int *err)
2061{ 2062{
2062 struct inode *inode = &ci->vfs_inode; 2063 struct inode *inode = &ci->vfs_inode;
2063 int ret = 0; 2064 int ret = 0;
2064 int have, implemented; 2065 int have, implemented, _got = 0;
2065 int file_wanted; 2066 int file_wanted;
2066 2067
2067 dout("get_cap_refs %p need %s want %s\n", inode, 2068 dout("get_cap_refs %p need %s want %s\n", inode,
2068 ceph_cap_string(need), ceph_cap_string(want)); 2069 ceph_cap_string(need), ceph_cap_string(want));
2070again:
2069 spin_lock(&ci->i_ceph_lock); 2071 spin_lock(&ci->i_ceph_lock);
2070 2072
2071 /* make sure file is actually open */ 2073 /* make sure file is actually open */
@@ -2075,7 +2077,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2075 ceph_cap_string(need), ceph_cap_string(file_wanted)); 2077 ceph_cap_string(need), ceph_cap_string(file_wanted));
2076 *err = -EBADF; 2078 *err = -EBADF;
2077 ret = 1; 2079 ret = 1;
2078 goto out; 2080 goto out_unlock;
2079 } 2081 }
2080 2082
2081 /* finish pending truncate */ 2083 /* finish pending truncate */
@@ -2095,7 +2097,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2095 *check_max = 1; 2097 *check_max = 1;
2096 ret = 1; 2098 ret = 1;
2097 } 2099 }
2098 goto out; 2100 goto out_unlock;
2099 } 2101 }
2100 /* 2102 /*
2101 * If a sync write is in progress, we must wait, so that we 2103 * If a sync write is in progress, we must wait, so that we
@@ -2103,7 +2105,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2103 */ 2105 */
2104 if (__ceph_have_pending_cap_snap(ci)) { 2106 if (__ceph_have_pending_cap_snap(ci)) {
2105 dout("get_cap_refs %p cap_snap_pending\n", inode); 2107 dout("get_cap_refs %p cap_snap_pending\n", inode);
2106 goto out; 2108 goto out_unlock;
2107 } 2109 }
2108 } 2110 }
2109 2111
@@ -2120,18 +2122,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2120 inode, ceph_cap_string(have), ceph_cap_string(not), 2122 inode, ceph_cap_string(have), ceph_cap_string(not),
2121 ceph_cap_string(revoking)); 2123 ceph_cap_string(revoking));
2122 if ((revoking & not) == 0) { 2124 if ((revoking & not) == 0) {
2123 *got = need | (have & want); 2125 _got = need | (have & want);
2124 __take_cap_refs(ci, *got); 2126 __take_cap_refs(ci, _got);
2125 ret = 1; 2127 ret = 1;
2126 } 2128 }
2127 } else { 2129 } else {
2128 dout("get_cap_refs %p have %s needed %s\n", inode, 2130 dout("get_cap_refs %p have %s needed %s\n", inode,
2129 ceph_cap_string(have), ceph_cap_string(need)); 2131 ceph_cap_string(have), ceph_cap_string(need));
2130 } 2132 }
2131out: 2133out_unlock:
2132 spin_unlock(&ci->i_ceph_lock); 2134 spin_unlock(&ci->i_ceph_lock);
2135
2136 if (ci->i_inline_version != CEPH_INLINE_NONE &&
2137 (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
2138 i_size_read(inode) > 0) {
2139 int ret1;
2140 struct page *page = find_get_page(inode->i_mapping, 0);
2141 if (page) {
2142 if (PageUptodate(page)) {
2143 *pinned_page = page;
2144 goto out;
2145 }
2146 page_cache_release(page);
2147 }
2148 /*
2149 * drop cap refs first because getattr while holding
2150 * caps refs can cause deadlock.
2151 */
2152 ceph_put_cap_refs(ci, _got);
2153 _got = 0;
2154
2155 /* getattr request will bring inline data into page cache */
2156 ret1 = __ceph_do_getattr(inode, NULL,
2157 CEPH_STAT_CAP_INLINE_DATA, true);
2158 if (ret1 >= 0) {
2159 ret = 0;
2160 goto again;
2161 }
2162 *err = ret1;
2163 ret = 1;
2164 }
2165out:
2133 dout("get_cap_refs %p ret %d got %s\n", inode, 2166 dout("get_cap_refs %p ret %d got %s\n", inode,
2134 ret, ceph_cap_string(*got)); 2167 ret, ceph_cap_string(_got));
2168 *got = _got;
2135 return ret; 2169 return ret;
2136} 2170}
2137 2171
@@ -2168,8 +2202,8 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2168 * due to a small max_size, make sure we check_max_size (and possibly 2202 * due to a small max_size, make sure we check_max_size (and possibly
2169 * ask the mds) so we don't get hung up indefinitely. 2203 * ask the mds) so we don't get hung up indefinitely.
2170 */ 2204 */
2171int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got, 2205int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
2172 loff_t endoff) 2206 loff_t endoff, int *got, struct page **pinned_page)
2173{ 2207{
2174 int check_max, ret, err; 2208 int check_max, ret, err;
2175 2209
@@ -2179,8 +2213,8 @@ retry:
2179 check_max = 0; 2213 check_max = 0;
2180 err = 0; 2214 err = 0;
2181 ret = wait_event_interruptible(ci->i_cap_wq, 2215 ret = wait_event_interruptible(ci->i_cap_wq,
2182 try_get_cap_refs(ci, need, want, 2216 try_get_cap_refs(ci, need, want, endoff,
2183 got, endoff, 2217 got, pinned_page,
2184 &check_max, &err)); 2218 &check_max, &err));
2185 if (err) 2219 if (err)
2186 ret = err; 2220 ret = err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index c03ac4c4bcd1..861b9954a63a 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -805,6 +805,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
805 size_t len = iocb->ki_nbytes; 805 size_t len = iocb->ki_nbytes;
806 struct inode *inode = file_inode(filp); 806 struct inode *inode = file_inode(filp);
807 struct ceph_inode_info *ci = ceph_inode(inode); 807 struct ceph_inode_info *ci = ceph_inode(inode);
808 struct page *pinned_page = NULL;
808 ssize_t ret; 809 ssize_t ret;
809 int want, got = 0; 810 int want, got = 0;
810 int checkeof = 0, read = 0; 811 int checkeof = 0, read = 0;
@@ -817,7 +818,7 @@ again:
817 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; 818 want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
818 else 819 else
819 want = CEPH_CAP_FILE_CACHE; 820 want = CEPH_CAP_FILE_CACHE;
820 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1); 821 ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
821 if (ret < 0) 822 if (ret < 0)
822 return ret; 823 return ret;
823 824
@@ -840,6 +841,10 @@ again:
840 } 841 }
841 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", 842 dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
842 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); 843 inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
844 if (pinned_page) {
845 page_cache_release(pinned_page);
846 pinned_page = NULL;
847 }
843 ceph_put_cap_refs(ci, got); 848 ceph_put_cap_refs(ci, got);
844 849
845 if (checkeof && ret >= 0) { 850 if (checkeof && ret >= 0) {
@@ -924,7 +929,8 @@ retry_snap:
924 else 929 else
925 want = CEPH_CAP_FILE_BUFFER; 930 want = CEPH_CAP_FILE_BUFFER;
926 got = 0; 931 got = 0;
927 err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count); 932 err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
933 &got, NULL);
928 if (err < 0) 934 if (err < 0)
929 goto out; 935 goto out;
930 936
@@ -1225,7 +1231,7 @@ static long ceph_fallocate(struct file *file, int mode,
1225 else 1231 else
1226 want = CEPH_CAP_FILE_BUFFER; 1232 want = CEPH_CAP_FILE_BUFFER;
1227 1233
1228 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); 1234 ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
1229 if (ret < 0) 1235 if (ret < 0)
1230 goto unlock; 1236 goto unlock;
1231 1237