aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/caps.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/caps.c')
-rw-r--r--fs/ceph/caps.c90
1 files changed, 60 insertions, 30 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7bf182b03973..5e9da996a151 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
814 used |= CEPH_CAP_PIN; 814 used |= CEPH_CAP_PIN;
815 if (ci->i_rd_ref) 815 if (ci->i_rd_ref)
816 used |= CEPH_CAP_FILE_RD; 816 used |= CEPH_CAP_FILE_RD;
817 if (ci->i_rdcache_ref || ci->i_rdcache_gen) 817 if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
818 used |= CEPH_CAP_FILE_CACHE; 818 used |= CEPH_CAP_FILE_CACHE;
819 if (ci->i_wr_ref) 819 if (ci->i_wr_ref)
820 used |= CEPH_CAP_FILE_WR; 820 used |= CEPH_CAP_FILE_WR;
@@ -1082,6 +1082,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1082 gid_t gid; 1082 gid_t gid;
1083 struct ceph_mds_session *session; 1083 struct ceph_mds_session *session;
1084 u64 xattr_version = 0; 1084 u64 xattr_version = 0;
1085 struct ceph_buffer *xattr_blob = NULL;
1085 int delayed = 0; 1086 int delayed = 0;
1086 u64 flush_tid = 0; 1087 u64 flush_tid = 0;
1087 int i; 1088 int i;
@@ -1142,6 +1143,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1142 for (i = 0; i < CEPH_CAP_BITS; i++) 1143 for (i = 0; i < CEPH_CAP_BITS; i++)
1143 if (flushing & (1 << i)) 1144 if (flushing & (1 << i))
1144 ci->i_cap_flush_tid[i] = flush_tid; 1145 ci->i_cap_flush_tid[i] = flush_tid;
1146
1147 follows = ci->i_head_snapc->seq;
1148 } else {
1149 follows = 0;
1145 } 1150 }
1146 1151
1147 keep = cap->implemented; 1152 keep = cap->implemented;
@@ -1155,14 +1160,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1155 mtime = inode->i_mtime; 1160 mtime = inode->i_mtime;
1156 atime = inode->i_atime; 1161 atime = inode->i_atime;
1157 time_warp_seq = ci->i_time_warp_seq; 1162 time_warp_seq = ci->i_time_warp_seq;
1158 follows = ci->i_snap_realm->cached_context->seq;
1159 uid = inode->i_uid; 1163 uid = inode->i_uid;
1160 gid = inode->i_gid; 1164 gid = inode->i_gid;
1161 mode = inode->i_mode; 1165 mode = inode->i_mode;
1162 1166
1163 if (dropping & CEPH_CAP_XATTR_EXCL) { 1167 if (flushing & CEPH_CAP_XATTR_EXCL) {
1164 __ceph_build_xattrs_blob(ci); 1168 __ceph_build_xattrs_blob(ci);
1165 xattr_version = ci->i_xattrs.version + 1; 1169 xattr_blob = ci->i_xattrs.blob;
1170 xattr_version = ci->i_xattrs.version;
1166 } 1171 }
1167 1172
1168 spin_unlock(&inode->i_lock); 1173 spin_unlock(&inode->i_lock);
@@ -1170,9 +1175,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1170 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, 1175 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
1171 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, 1176 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
1172 size, max_size, &mtime, &atime, time_warp_seq, 1177 size, max_size, &mtime, &atime, time_warp_seq,
1173 uid, gid, mode, 1178 uid, gid, mode, xattr_version, xattr_blob,
1174 xattr_version,
1175 (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL,
1176 follows); 1179 follows);
1177 if (ret < 0) { 1180 if (ret < 0) {
1178 dout("error sending cap msg, must requeue %p\n", inode); 1181 dout("error sending cap msg, must requeue %p\n", inode);
@@ -1192,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1192 * asynchronously back to the MDS once sync writes complete and dirty 1195 * asynchronously back to the MDS once sync writes complete and dirty
1193 * data is written out. 1196 * data is written out.
1194 * 1197 *
1198 * Unless @again is true, skip cap_snaps that were already sent to
1199 * the MDS (i.e., during this session).
1200 *
1195 * Called under i_lock. Takes s_mutex as needed. 1201 * Called under i_lock. Takes s_mutex as needed.
1196 */ 1202 */
1197void __ceph_flush_snaps(struct ceph_inode_info *ci, 1203void __ceph_flush_snaps(struct ceph_inode_info *ci,
1198 struct ceph_mds_session **psession) 1204 struct ceph_mds_session **psession,
1205 int again)
1199 __releases(ci->vfs_inode->i_lock) 1206 __releases(ci->vfs_inode->i_lock)
1200 __acquires(ci->vfs_inode->i_lock) 1207 __acquires(ci->vfs_inode->i_lock)
1201{ 1208{
@@ -1224,7 +1231,7 @@ retry:
1224 * pages to be written out. 1231 * pages to be written out.
1225 */ 1232 */
1226 if (capsnap->dirty_pages || capsnap->writing) 1233 if (capsnap->dirty_pages || capsnap->writing)
1227 continue; 1234 break;
1228 1235
1229 /* 1236 /*
1230 * if cap writeback already occurred, we should have dropped 1237 * if cap writeback already occurred, we should have dropped
@@ -1237,6 +1244,13 @@ retry:
1237 dout("no auth cap (migrating?), doing nothing\n"); 1244 dout("no auth cap (migrating?), doing nothing\n");
1238 goto out; 1245 goto out;
1239 } 1246 }
1247
1248 /* only flush each capsnap once */
1249 if (!again && !list_empty(&capsnap->flushing_item)) {
1250 dout("already flushed %p, skipping\n", capsnap);
1251 continue;
1252 }
1253
1240 mds = ci->i_auth_cap->session->s_mds; 1254 mds = ci->i_auth_cap->session->s_mds;
1241 mseq = ci->i_auth_cap->mseq; 1255 mseq = ci->i_auth_cap->mseq;
1242 1256
@@ -1273,8 +1287,8 @@ retry:
1273 &session->s_cap_snaps_flushing); 1287 &session->s_cap_snaps_flushing);
1274 spin_unlock(&inode->i_lock); 1288 spin_unlock(&inode->i_lock);
1275 1289
1276 dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", 1290 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
1277 inode, capsnap, next_follows, capsnap->size); 1291 inode, capsnap, capsnap->follows, capsnap->flush_tid);
1278 send_cap_msg(session, ceph_vino(inode).ino, 0, 1292 send_cap_msg(session, ceph_vino(inode).ino, 0,
1279 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, 1293 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
1280 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, 1294 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
@@ -1282,7 +1296,7 @@ retry:
1282 &capsnap->mtime, &capsnap->atime, 1296 &capsnap->mtime, &capsnap->atime,
1283 capsnap->time_warp_seq, 1297 capsnap->time_warp_seq,
1284 capsnap->uid, capsnap->gid, capsnap->mode, 1298 capsnap->uid, capsnap->gid, capsnap->mode,
1285 0, NULL, 1299 capsnap->xattr_version, capsnap->xattr_blob,
1286 capsnap->follows); 1300 capsnap->follows);
1287 1301
1288 next_follows = capsnap->follows + 1; 1302 next_follows = capsnap->follows + 1;
@@ -1311,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
1311 struct inode *inode = &ci->vfs_inode; 1325 struct inode *inode = &ci->vfs_inode;
1312 1326
1313 spin_lock(&inode->i_lock); 1327 spin_lock(&inode->i_lock);
1314 __ceph_flush_snaps(ci, NULL); 1328 __ceph_flush_snaps(ci, NULL, 0);
1315 spin_unlock(&inode->i_lock); 1329 spin_unlock(&inode->i_lock);
1316} 1330}
1317 1331
@@ -1332,7 +1346,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1332 ceph_cap_string(was | mask)); 1346 ceph_cap_string(was | mask));
1333 ci->i_dirty_caps |= mask; 1347 ci->i_dirty_caps |= mask;
1334 if (was == 0) { 1348 if (was == 0) {
1335 dout(" inode %p now dirty\n", &ci->vfs_inode); 1349 if (!ci->i_head_snapc)
1350 ci->i_head_snapc = ceph_get_snap_context(
1351 ci->i_snap_realm->cached_context);
1352 dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode,
1353 ci->i_head_snapc);
1336 BUG_ON(!list_empty(&ci->i_dirty_item)); 1354 BUG_ON(!list_empty(&ci->i_dirty_item));
1337 spin_lock(&mdsc->cap_dirty_lock); 1355 spin_lock(&mdsc->cap_dirty_lock);
1338 list_add(&ci->i_dirty_item, &mdsc->cap_dirty); 1356 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -1470,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1470 1488
1471 /* flush snaps first time around only */ 1489 /* flush snaps first time around only */
1472 if (!list_empty(&ci->i_cap_snaps)) 1490 if (!list_empty(&ci->i_cap_snaps))
1473 __ceph_flush_snaps(ci, &session); 1491 __ceph_flush_snaps(ci, &session, 0);
1474 goto retry_locked; 1492 goto retry_locked;
1475retry: 1493retry:
1476 spin_lock(&inode->i_lock); 1494 spin_lock(&inode->i_lock);
@@ -1887,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1887 if (cap && cap->session == session) { 1905 if (cap && cap->session == session) {
1888 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, 1906 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
1889 cap, capsnap); 1907 cap, capsnap);
1890 __ceph_flush_snaps(ci, &session); 1908 __ceph_flush_snaps(ci, &session, 1);
1891 } else { 1909 } else {
1892 pr_err("%p auth cap %p not mds%d ???\n", inode, 1910 pr_err("%p auth cap %p not mds%d ???\n", inode,
1893 cap, session->s_mds); 1911 cap, session->s_mds);
@@ -2190,7 +2208,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2190 2208
2191 if (ci->i_head_snapc == snapc) { 2209 if (ci->i_head_snapc == snapc) {
2192 ci->i_wrbuffer_ref_head -= nr; 2210 ci->i_wrbuffer_ref_head -= nr;
2193 if (!ci->i_wrbuffer_ref_head) { 2211 if (ci->i_wrbuffer_ref_head == 0 &&
2212 ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) {
2213 BUG_ON(!ci->i_head_snapc);
2194 ceph_put_snap_context(ci->i_head_snapc); 2214 ceph_put_snap_context(ci->i_head_snapc);
2195 ci->i_head_snapc = NULL; 2215 ci->i_head_snapc = NULL;
2196 } 2216 }
@@ -2263,7 +2283,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2263{ 2283{
2264 struct ceph_inode_info *ci = ceph_inode(inode); 2284 struct ceph_inode_info *ci = ceph_inode(inode);
2265 int mds = session->s_mds; 2285 int mds = session->s_mds;
2266 int seq = le32_to_cpu(grant->seq); 2286 unsigned seq = le32_to_cpu(grant->seq);
2287 unsigned issue_seq = le32_to_cpu(grant->issue_seq);
2267 int newcaps = le32_to_cpu(grant->caps); 2288 int newcaps = le32_to_cpu(grant->caps);
2268 int issued, implemented, used, wanted, dirty; 2289 int issued, implemented, used, wanted, dirty;
2269 u64 size = le64_to_cpu(grant->size); 2290 u64 size = le64_to_cpu(grant->size);
@@ -2275,8 +2296,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2275 int revoked_rdcache = 0; 2296 int revoked_rdcache = 0;
2276 int queue_invalidate = 0; 2297 int queue_invalidate = 0;
2277 2298
2278 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2299 dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n",
2279 inode, cap, mds, seq, ceph_cap_string(newcaps)); 2300 inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps));
2280 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, 2301 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
2281 inode->i_size); 2302 inode->i_size);
2282 2303
@@ -2372,6 +2393,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2372 } 2393 }
2373 2394
2374 cap->seq = seq; 2395 cap->seq = seq;
2396 cap->issue_seq = issue_seq;
2375 2397
2376 /* file layout may have changed */ 2398 /* file layout may have changed */
2377 ci->i_layout = grant->layout; 2399 ci->i_layout = grant->layout;
@@ -2483,6 +2505,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2483 dout(" inode %p now clean\n", inode); 2505 dout(" inode %p now clean\n", inode);
2484 BUG_ON(!list_empty(&ci->i_dirty_item)); 2506 BUG_ON(!list_empty(&ci->i_dirty_item));
2485 drop = 1; 2507 drop = 1;
2508 if (ci->i_wrbuffer_ref_head == 0) {
2509 BUG_ON(!ci->i_head_snapc);
2510 ceph_put_snap_context(ci->i_head_snapc);
2511 ci->i_head_snapc = NULL;
2512 }
2486 } else { 2513 } else {
2487 BUG_ON(list_empty(&ci->i_dirty_item)); 2514 BUG_ON(list_empty(&ci->i_dirty_item));
2488 } 2515 }
@@ -2749,15 +2776,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2749 if (op == CEPH_CAP_OP_IMPORT) 2776 if (op == CEPH_CAP_OP_IMPORT)
2750 __queue_cap_release(session, vino.ino, cap_id, 2777 __queue_cap_release(session, vino.ino, cap_id,
2751 mseq, seq); 2778 mseq, seq);
2752 2779 goto flush_cap_releases;
2753 /*
2754 * send any full release message to try to move things
2755 * along for the mds (who clearly thinks we still have this
2756 * cap).
2757 */
2758 ceph_add_cap_releases(mdsc, session);
2759 ceph_send_cap_releases(mdsc, session);
2760 goto done;
2761 } 2780 }
2762 2781
2763 /* these will work even if we don't have a cap yet */ 2782 /* these will work even if we don't have a cap yet */
@@ -2785,7 +2804,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2785 dout(" no cap on %p ino %llx.%llx from mds%d\n", 2804 dout(" no cap on %p ino %llx.%llx from mds%d\n",
2786 inode, ceph_ino(inode), ceph_snap(inode), mds); 2805 inode, ceph_ino(inode), ceph_snap(inode), mds);
2787 spin_unlock(&inode->i_lock); 2806 spin_unlock(&inode->i_lock);
2788 goto done; 2807 goto flush_cap_releases;
2789 } 2808 }
2790 2809
2791 /* note that each of these drops i_lock for us */ 2810 /* note that each of these drops i_lock for us */
@@ -2809,6 +2828,17 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2809 ceph_cap_op_name(op)); 2828 ceph_cap_op_name(op));
2810 } 2829 }
2811 2830
2831 goto done;
2832
2833flush_cap_releases:
2834 /*
2835 * send any full release message to try to move things
2836 * along for the mds (who clearly thinks we still have this
2837 * cap).
2838 */
2839 ceph_add_cap_releases(mdsc, session);
2840 ceph_send_cap_releases(mdsc, session);
2841
2812done: 2842done:
2813 mutex_unlock(&session->s_mutex); 2843 mutex_unlock(&session->s_mutex);
2814done_unlocked: 2844done_unlocked: