diff options
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r-- | fs/ceph/caps.c | 90 |
1 files changed, 60 insertions, 30 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7bf182b03973..5e9da996a151 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) | |||
814 | used |= CEPH_CAP_PIN; | 814 | used |= CEPH_CAP_PIN; |
815 | if (ci->i_rd_ref) | 815 | if (ci->i_rd_ref) |
816 | used |= CEPH_CAP_FILE_RD; | 816 | used |= CEPH_CAP_FILE_RD; |
817 | if (ci->i_rdcache_ref || ci->i_rdcache_gen) | 817 | if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) |
818 | used |= CEPH_CAP_FILE_CACHE; | 818 | used |= CEPH_CAP_FILE_CACHE; |
819 | if (ci->i_wr_ref) | 819 | if (ci->i_wr_ref) |
820 | used |= CEPH_CAP_FILE_WR; | 820 | used |= CEPH_CAP_FILE_WR; |
@@ -1082,6 +1082,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1082 | gid_t gid; | 1082 | gid_t gid; |
1083 | struct ceph_mds_session *session; | 1083 | struct ceph_mds_session *session; |
1084 | u64 xattr_version = 0; | 1084 | u64 xattr_version = 0; |
1085 | struct ceph_buffer *xattr_blob = NULL; | ||
1085 | int delayed = 0; | 1086 | int delayed = 0; |
1086 | u64 flush_tid = 0; | 1087 | u64 flush_tid = 0; |
1087 | int i; | 1088 | int i; |
@@ -1142,6 +1143,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1142 | for (i = 0; i < CEPH_CAP_BITS; i++) | 1143 | for (i = 0; i < CEPH_CAP_BITS; i++) |
1143 | if (flushing & (1 << i)) | 1144 | if (flushing & (1 << i)) |
1144 | ci->i_cap_flush_tid[i] = flush_tid; | 1145 | ci->i_cap_flush_tid[i] = flush_tid; |
1146 | |||
1147 | follows = ci->i_head_snapc->seq; | ||
1148 | } else { | ||
1149 | follows = 0; | ||
1145 | } | 1150 | } |
1146 | 1151 | ||
1147 | keep = cap->implemented; | 1152 | keep = cap->implemented; |
@@ -1155,14 +1160,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1155 | mtime = inode->i_mtime; | 1160 | mtime = inode->i_mtime; |
1156 | atime = inode->i_atime; | 1161 | atime = inode->i_atime; |
1157 | time_warp_seq = ci->i_time_warp_seq; | 1162 | time_warp_seq = ci->i_time_warp_seq; |
1158 | follows = ci->i_snap_realm->cached_context->seq; | ||
1159 | uid = inode->i_uid; | 1163 | uid = inode->i_uid; |
1160 | gid = inode->i_gid; | 1164 | gid = inode->i_gid; |
1161 | mode = inode->i_mode; | 1165 | mode = inode->i_mode; |
1162 | 1166 | ||
1163 | if (dropping & CEPH_CAP_XATTR_EXCL) { | 1167 | if (flushing & CEPH_CAP_XATTR_EXCL) { |
1164 | __ceph_build_xattrs_blob(ci); | 1168 | __ceph_build_xattrs_blob(ci); |
1165 | xattr_version = ci->i_xattrs.version + 1; | 1169 | xattr_blob = ci->i_xattrs.blob; |
1170 | xattr_version = ci->i_xattrs.version; | ||
1166 | } | 1171 | } |
1167 | 1172 | ||
1168 | spin_unlock(&inode->i_lock); | 1173 | spin_unlock(&inode->i_lock); |
@@ -1170,9 +1175,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1170 | ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, | 1175 | ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, |
1171 | op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, | 1176 | op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, |
1172 | size, max_size, &mtime, &atime, time_warp_seq, | 1177 | size, max_size, &mtime, &atime, time_warp_seq, |
1173 | uid, gid, mode, | 1178 | uid, gid, mode, xattr_version, xattr_blob, |
1174 | xattr_version, | ||
1175 | (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL, | ||
1176 | follows); | 1179 | follows); |
1177 | if (ret < 0) { | 1180 | if (ret < 0) { |
1178 | dout("error sending cap msg, must requeue %p\n", inode); | 1181 | dout("error sending cap msg, must requeue %p\n", inode); |
@@ -1192,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1192 | * asynchronously back to the MDS once sync writes complete and dirty | 1195 | * asynchronously back to the MDS once sync writes complete and dirty |
1193 | * data is written out. | 1196 | * data is written out. |
1194 | * | 1197 | * |
1198 | * Unless @again is true, skip cap_snaps that were already sent to | ||
1199 | * the MDS (i.e., during this session). | ||
1200 | * | ||
1195 | * Called under i_lock. Takes s_mutex as needed. | 1201 | * Called under i_lock. Takes s_mutex as needed. |
1196 | */ | 1202 | */ |
1197 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1203 | void __ceph_flush_snaps(struct ceph_inode_info *ci, |
1198 | struct ceph_mds_session **psession) | 1204 | struct ceph_mds_session **psession, |
1205 | int again) | ||
1199 | __releases(ci->vfs_inode->i_lock) | 1206 | __releases(ci->vfs_inode->i_lock) |
1200 | __acquires(ci->vfs_inode->i_lock) | 1207 | __acquires(ci->vfs_inode->i_lock) |
1201 | { | 1208 | { |
@@ -1224,7 +1231,7 @@ retry: | |||
1224 | * pages to be written out. | 1231 | * pages to be written out. |
1225 | */ | 1232 | */ |
1226 | if (capsnap->dirty_pages || capsnap->writing) | 1233 | if (capsnap->dirty_pages || capsnap->writing) |
1227 | continue; | 1234 | break; |
1228 | 1235 | ||
1229 | /* | 1236 | /* |
1230 | * if cap writeback already occurred, we should have dropped | 1237 | * if cap writeback already occurred, we should have dropped |
@@ -1237,6 +1244,13 @@ retry: | |||
1237 | dout("no auth cap (migrating?), doing nothing\n"); | 1244 | dout("no auth cap (migrating?), doing nothing\n"); |
1238 | goto out; | 1245 | goto out; |
1239 | } | 1246 | } |
1247 | |||
1248 | /* only flush each capsnap once */ | ||
1249 | if (!again && !list_empty(&capsnap->flushing_item)) { | ||
1250 | dout("already flushed %p, skipping\n", capsnap); | ||
1251 | continue; | ||
1252 | } | ||
1253 | |||
1240 | mds = ci->i_auth_cap->session->s_mds; | 1254 | mds = ci->i_auth_cap->session->s_mds; |
1241 | mseq = ci->i_auth_cap->mseq; | 1255 | mseq = ci->i_auth_cap->mseq; |
1242 | 1256 | ||
@@ -1273,8 +1287,8 @@ retry: | |||
1273 | &session->s_cap_snaps_flushing); | 1287 | &session->s_cap_snaps_flushing); |
1274 | spin_unlock(&inode->i_lock); | 1288 | spin_unlock(&inode->i_lock); |
1275 | 1289 | ||
1276 | dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", | 1290 | dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", |
1277 | inode, capsnap, next_follows, capsnap->size); | 1291 | inode, capsnap, capsnap->follows, capsnap->flush_tid); |
1278 | send_cap_msg(session, ceph_vino(inode).ino, 0, | 1292 | send_cap_msg(session, ceph_vino(inode).ino, 0, |
1279 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, | 1293 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, |
1280 | capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, | 1294 | capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, |
@@ -1282,7 +1296,7 @@ retry: | |||
1282 | &capsnap->mtime, &capsnap->atime, | 1296 | &capsnap->mtime, &capsnap->atime, |
1283 | capsnap->time_warp_seq, | 1297 | capsnap->time_warp_seq, |
1284 | capsnap->uid, capsnap->gid, capsnap->mode, | 1298 | capsnap->uid, capsnap->gid, capsnap->mode, |
1285 | 0, NULL, | 1299 | capsnap->xattr_version, capsnap->xattr_blob, |
1286 | capsnap->follows); | 1300 | capsnap->follows); |
1287 | 1301 | ||
1288 | next_follows = capsnap->follows + 1; | 1302 | next_follows = capsnap->follows + 1; |
@@ -1311,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1311 | struct inode *inode = &ci->vfs_inode; | 1325 | struct inode *inode = &ci->vfs_inode; |
1312 | 1326 | ||
1313 | spin_lock(&inode->i_lock); | 1327 | spin_lock(&inode->i_lock); |
1314 | __ceph_flush_snaps(ci, NULL); | 1328 | __ceph_flush_snaps(ci, NULL, 0); |
1315 | spin_unlock(&inode->i_lock); | 1329 | spin_unlock(&inode->i_lock); |
1316 | } | 1330 | } |
1317 | 1331 | ||
@@ -1332,7 +1346,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1332 | ceph_cap_string(was | mask)); | 1346 | ceph_cap_string(was | mask)); |
1333 | ci->i_dirty_caps |= mask; | 1347 | ci->i_dirty_caps |= mask; |
1334 | if (was == 0) { | 1348 | if (was == 0) { |
1335 | dout(" inode %p now dirty\n", &ci->vfs_inode); | 1349 | if (!ci->i_head_snapc) |
1350 | ci->i_head_snapc = ceph_get_snap_context( | ||
1351 | ci->i_snap_realm->cached_context); | ||
1352 | dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode, | ||
1353 | ci->i_head_snapc); | ||
1336 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 1354 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
1337 | spin_lock(&mdsc->cap_dirty_lock); | 1355 | spin_lock(&mdsc->cap_dirty_lock); |
1338 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); | 1356 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); |
@@ -1470,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
1470 | 1488 | ||
1471 | /* flush snaps first time around only */ | 1489 | /* flush snaps first time around only */ |
1472 | if (!list_empty(&ci->i_cap_snaps)) | 1490 | if (!list_empty(&ci->i_cap_snaps)) |
1473 | __ceph_flush_snaps(ci, &session); | 1491 | __ceph_flush_snaps(ci, &session, 0); |
1474 | goto retry_locked; | 1492 | goto retry_locked; |
1475 | retry: | 1493 | retry: |
1476 | spin_lock(&inode->i_lock); | 1494 | spin_lock(&inode->i_lock); |
@@ -1887,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, | |||
1887 | if (cap && cap->session == session) { | 1905 | if (cap && cap->session == session) { |
1888 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, | 1906 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, |
1889 | cap, capsnap); | 1907 | cap, capsnap); |
1890 | __ceph_flush_snaps(ci, &session); | 1908 | __ceph_flush_snaps(ci, &session, 1); |
1891 | } else { | 1909 | } else { |
1892 | pr_err("%p auth cap %p not mds%d ???\n", inode, | 1910 | pr_err("%p auth cap %p not mds%d ???\n", inode, |
1893 | cap, session->s_mds); | 1911 | cap, session->s_mds); |
@@ -2190,7 +2208,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
2190 | 2208 | ||
2191 | if (ci->i_head_snapc == snapc) { | 2209 | if (ci->i_head_snapc == snapc) { |
2192 | ci->i_wrbuffer_ref_head -= nr; | 2210 | ci->i_wrbuffer_ref_head -= nr; |
2193 | if (!ci->i_wrbuffer_ref_head) { | 2211 | if (ci->i_wrbuffer_ref_head == 0 && |
2212 | ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { | ||
2213 | BUG_ON(!ci->i_head_snapc); | ||
2194 | ceph_put_snap_context(ci->i_head_snapc); | 2214 | ceph_put_snap_context(ci->i_head_snapc); |
2195 | ci->i_head_snapc = NULL; | 2215 | ci->i_head_snapc = NULL; |
2196 | } | 2216 | } |
@@ -2263,7 +2283,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2263 | { | 2283 | { |
2264 | struct ceph_inode_info *ci = ceph_inode(inode); | 2284 | struct ceph_inode_info *ci = ceph_inode(inode); |
2265 | int mds = session->s_mds; | 2285 | int mds = session->s_mds; |
2266 | int seq = le32_to_cpu(grant->seq); | 2286 | unsigned seq = le32_to_cpu(grant->seq); |
2287 | unsigned issue_seq = le32_to_cpu(grant->issue_seq); | ||
2267 | int newcaps = le32_to_cpu(grant->caps); | 2288 | int newcaps = le32_to_cpu(grant->caps); |
2268 | int issued, implemented, used, wanted, dirty; | 2289 | int issued, implemented, used, wanted, dirty; |
2269 | u64 size = le64_to_cpu(grant->size); | 2290 | u64 size = le64_to_cpu(grant->size); |
@@ -2275,8 +2296,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2275 | int revoked_rdcache = 0; | 2296 | int revoked_rdcache = 0; |
2276 | int queue_invalidate = 0; | 2297 | int queue_invalidate = 0; |
2277 | 2298 | ||
2278 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", | 2299 | dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", |
2279 | inode, cap, mds, seq, ceph_cap_string(newcaps)); | 2300 | inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); |
2280 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, | 2301 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, |
2281 | inode->i_size); | 2302 | inode->i_size); |
2282 | 2303 | ||
@@ -2372,6 +2393,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2372 | } | 2393 | } |
2373 | 2394 | ||
2374 | cap->seq = seq; | 2395 | cap->seq = seq; |
2396 | cap->issue_seq = issue_seq; | ||
2375 | 2397 | ||
2376 | /* file layout may have changed */ | 2398 | /* file layout may have changed */ |
2377 | ci->i_layout = grant->layout; | 2399 | ci->i_layout = grant->layout; |
@@ -2483,6 +2505,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2483 | dout(" inode %p now clean\n", inode); | 2505 | dout(" inode %p now clean\n", inode); |
2484 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 2506 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
2485 | drop = 1; | 2507 | drop = 1; |
2508 | if (ci->i_wrbuffer_ref_head == 0) { | ||
2509 | BUG_ON(!ci->i_head_snapc); | ||
2510 | ceph_put_snap_context(ci->i_head_snapc); | ||
2511 | ci->i_head_snapc = NULL; | ||
2512 | } | ||
2486 | } else { | 2513 | } else { |
2487 | BUG_ON(list_empty(&ci->i_dirty_item)); | 2514 | BUG_ON(list_empty(&ci->i_dirty_item)); |
2488 | } | 2515 | } |
@@ -2749,15 +2776,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2749 | if (op == CEPH_CAP_OP_IMPORT) | 2776 | if (op == CEPH_CAP_OP_IMPORT) |
2750 | __queue_cap_release(session, vino.ino, cap_id, | 2777 | __queue_cap_release(session, vino.ino, cap_id, |
2751 | mseq, seq); | 2778 | mseq, seq); |
2752 | 2779 | goto flush_cap_releases; | |
2753 | /* | ||
2754 | * send any full release message to try to move things | ||
2755 | * along for the mds (who clearly thinks we still have this | ||
2756 | * cap). | ||
2757 | */ | ||
2758 | ceph_add_cap_releases(mdsc, session); | ||
2759 | ceph_send_cap_releases(mdsc, session); | ||
2760 | goto done; | ||
2761 | } | 2780 | } |
2762 | 2781 | ||
2763 | /* these will work even if we don't have a cap yet */ | 2782 | /* these will work even if we don't have a cap yet */ |
@@ -2785,7 +2804,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2785 | dout(" no cap on %p ino %llx.%llx from mds%d\n", | 2804 | dout(" no cap on %p ino %llx.%llx from mds%d\n", |
2786 | inode, ceph_ino(inode), ceph_snap(inode), mds); | 2805 | inode, ceph_ino(inode), ceph_snap(inode), mds); |
2787 | spin_unlock(&inode->i_lock); | 2806 | spin_unlock(&inode->i_lock); |
2788 | goto done; | 2807 | goto flush_cap_releases; |
2789 | } | 2808 | } |
2790 | 2809 | ||
2791 | /* note that each of these drops i_lock for us */ | 2810 | /* note that each of these drops i_lock for us */ |
@@ -2809,6 +2828,17 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2809 | ceph_cap_op_name(op)); | 2828 | ceph_cap_op_name(op)); |
2810 | } | 2829 | } |
2811 | 2830 | ||
2831 | goto done; | ||
2832 | |||
2833 | flush_cap_releases: | ||
2834 | /* | ||
2835 | * send any full release message to try to move things | ||
2836 | * along for the mds (who clearly thinks we still have this | ||
2837 | * cap). | ||
2838 | */ | ||
2839 | ceph_add_cap_releases(mdsc, session); | ||
2840 | ceph_send_cap_releases(mdsc, session); | ||
2841 | |||
2812 | done: | 2842 | done: |
2813 | mutex_unlock(&session->s_mutex); | 2843 | mutex_unlock(&session->s_mutex); |
2814 | done_unlocked: | 2844 | done_unlocked: |