diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-28 17:07:20 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-28 17:07:20 -0400 |
| commit | 997396a73a94de7d92d82e30d7bb1d931e38cb16 (patch) | |
| tree | 2190a66e085f16a1985e008be167d6fc4ea6734d | |
| parent | 6f4dbeca1a5bac4552d49d9e7b774da9f6625e74 (diff) | |
| parent | b545787dbb00a041c541a4759d938ddb0108295a (diff) | |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: fix get_ticket_handler() error handling
ceph: don't BUG on ENOMEM during mds reconnect
ceph: ceph_mdsc_build_path() returns an ERR_PTR
ceph: Fix warnings
ceph: ceph_get_inode() returns an ERR_PTR
ceph: initialize fields on new dentry_infos
ceph: maintain i_head_snapc when any caps are dirty, not just for data
ceph: fix osd request lru adjustment when sending request
ceph: don't improperly set dir complete when holding EXCL cap
mm: exporting account_page_dirty
ceph: direct requests in snapped namespace based on nonsnap parent
ceph: queue cap snap writeback for realm children on snap update
ceph: include dirty xattrs state in snapped caps
ceph: fix xattr cap writeback
ceph: fix multiple mds session shutdown
| -rw-r--r-- | fs/ceph/addr.c | 12 | ||||
| -rw-r--r-- | fs/ceph/auth_x.c | 15 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 32 | ||||
| -rw-r--r-- | fs/ceph/debugfs.c | 4 | ||||
| -rw-r--r-- | fs/ceph/dir.c | 2 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 5 | ||||
| -rw-r--r-- | fs/ceph/locks.c | 14 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 101 | ||||
| -rw-r--r-- | fs/ceph/mds_client.h | 3 | ||||
| -rw-r--r-- | fs/ceph/osd_client.c | 2 | ||||
| -rw-r--r-- | fs/ceph/snap.c | 89 | ||||
| -rw-r--r-- | fs/ceph/super.h | 11 | ||||
| -rw-r--r-- | fs/ceph/xattr.c | 1 | ||||
| -rw-r--r-- | mm/page-writeback.c | 1 |
14 files changed, 185 insertions, 107 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 5598a0d02295..4cfce1ee31fa 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page) | |||
| 87 | 87 | ||
| 88 | /* dirty the head */ | 88 | /* dirty the head */ |
| 89 | spin_lock(&inode->i_lock); | 89 | spin_lock(&inode->i_lock); |
| 90 | if (ci->i_wrbuffer_ref_head == 0) | 90 | if (ci->i_head_snapc == NULL) |
| 91 | ci->i_head_snapc = ceph_get_snap_context(snapc); | 91 | ci->i_head_snapc = ceph_get_snap_context(snapc); |
| 92 | ++ci->i_wrbuffer_ref_head; | 92 | ++ci->i_wrbuffer_ref_head; |
| 93 | if (ci->i_wrbuffer_ref == 0) | 93 | if (ci->i_wrbuffer_ref == 0) |
| @@ -105,13 +105,7 @@ static int ceph_set_page_dirty(struct page *page) | |||
| 105 | spin_lock_irq(&mapping->tree_lock); | 105 | spin_lock_irq(&mapping->tree_lock); |
| 106 | if (page->mapping) { /* Race with truncate? */ | 106 | if (page->mapping) { /* Race with truncate? */ |
| 107 | WARN_ON_ONCE(!PageUptodate(page)); | 107 | WARN_ON_ONCE(!PageUptodate(page)); |
| 108 | 108 | account_page_dirtied(page, page->mapping); | |
| 109 | if (mapping_cap_account_dirty(mapping)) { | ||
| 110 | __inc_zone_page_state(page, NR_FILE_DIRTY); | ||
| 111 | __inc_bdi_stat(mapping->backing_dev_info, | ||
| 112 | BDI_RECLAIMABLE); | ||
| 113 | task_io_account_write(PAGE_CACHE_SIZE); | ||
| 114 | } | ||
| 115 | radix_tree_tag_set(&mapping->page_tree, | 109 | radix_tree_tag_set(&mapping->page_tree, |
| 116 | page_index(page), PAGECACHE_TAG_DIRTY); | 110 | page_index(page), PAGECACHE_TAG_DIRTY); |
| 117 | 111 | ||
| @@ -352,7 +346,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode, | |||
| 352 | break; | 346 | break; |
| 353 | } | 347 | } |
| 354 | } | 348 | } |
| 355 | if (!snapc && ci->i_head_snapc) { | 349 | if (!snapc && ci->i_wrbuffer_ref_head) { |
| 356 | snapc = ceph_get_snap_context(ci->i_head_snapc); | 350 | snapc = ceph_get_snap_context(ci->i_head_snapc); |
| 357 | dout(" head snapc %p has %d dirty pages\n", | 351 | dout(" head snapc %p has %d dirty pages\n", |
| 358 | snapc, ci->i_wrbuffer_ref_head); | 352 | snapc, ci->i_wrbuffer_ref_head); |
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c index 582e0b2caf8a..a2d002cbdec2 100644 --- a/fs/ceph/auth_x.c +++ b/fs/ceph/auth_x.c | |||
| @@ -376,7 +376,7 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) | |||
| 376 | 376 | ||
| 377 | th = get_ticket_handler(ac, service); | 377 | th = get_ticket_handler(ac, service); |
| 378 | 378 | ||
| 379 | if (!th) { | 379 | if (IS_ERR(th)) { |
| 380 | *pneed |= service; | 380 | *pneed |= service; |
| 381 | continue; | 381 | continue; |
| 382 | } | 382 | } |
| @@ -399,6 +399,9 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, | |||
| 399 | struct ceph_x_ticket_handler *th = | 399 | struct ceph_x_ticket_handler *th = |
| 400 | get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); | 400 | get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); |
| 401 | 401 | ||
| 402 | if (IS_ERR(th)) | ||
| 403 | return PTR_ERR(th); | ||
| 404 | |||
| 402 | ceph_x_validate_tickets(ac, &need); | 405 | ceph_x_validate_tickets(ac, &need); |
| 403 | 406 | ||
| 404 | dout("build_request want %x have %x need %x\n", | 407 | dout("build_request want %x have %x need %x\n", |
| @@ -450,7 +453,6 @@ static int ceph_x_build_request(struct ceph_auth_client *ac, | |||
| 450 | return -ERANGE; | 453 | return -ERANGE; |
| 451 | head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); | 454 | head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); |
| 452 | 455 | ||
| 453 | BUG_ON(!th); | ||
| 454 | ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); | 456 | ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); |
| 455 | if (ret) | 457 | if (ret) |
| 456 | return ret; | 458 | return ret; |
| @@ -505,7 +507,8 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, | |||
| 505 | 507 | ||
| 506 | case CEPHX_GET_PRINCIPAL_SESSION_KEY: | 508 | case CEPHX_GET_PRINCIPAL_SESSION_KEY: |
| 507 | th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); | 509 | th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); |
| 508 | BUG_ON(!th); | 510 | if (IS_ERR(th)) |
| 511 | return PTR_ERR(th); | ||
| 509 | ret = ceph_x_proc_ticket_reply(ac, &th->session_key, | 512 | ret = ceph_x_proc_ticket_reply(ac, &th->session_key, |
| 510 | buf + sizeof(*head), end); | 513 | buf + sizeof(*head), end); |
| 511 | break; | 514 | break; |
| @@ -563,8 +566,8 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, | |||
| 563 | void *end = p + sizeof(au->reply_buf); | 566 | void *end = p + sizeof(au->reply_buf); |
| 564 | 567 | ||
| 565 | th = get_ticket_handler(ac, au->service); | 568 | th = get_ticket_handler(ac, au->service); |
| 566 | if (!th) | 569 | if (IS_ERR(th)) |
| 567 | return -EIO; /* hrm! */ | 570 | return PTR_ERR(th); |
| 568 | ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); | 571 | ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); |
| 569 | if (ret < 0) | 572 | if (ret < 0) |
| 570 | return ret; | 573 | return ret; |
| @@ -626,7 +629,7 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, | |||
| 626 | struct ceph_x_ticket_handler *th; | 629 | struct ceph_x_ticket_handler *th; |
| 627 | 630 | ||
| 628 | th = get_ticket_handler(ac, peer_type); | 631 | th = get_ticket_handler(ac, peer_type); |
| 629 | if (th && !IS_ERR(th)) | 632 | if (!IS_ERR(th)) |
| 630 | remove_ticket_handler(ac, th); | 633 | remove_ticket_handler(ac, th); |
| 631 | } | 634 | } |
| 632 | 635 | ||
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7bf182b03973..a2069b6680ae 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -1082,6 +1082,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
| 1082 | gid_t gid; | 1082 | gid_t gid; |
| 1083 | struct ceph_mds_session *session; | 1083 | struct ceph_mds_session *session; |
| 1084 | u64 xattr_version = 0; | 1084 | u64 xattr_version = 0; |
| 1085 | struct ceph_buffer *xattr_blob = NULL; | ||
| 1085 | int delayed = 0; | 1086 | int delayed = 0; |
| 1086 | u64 flush_tid = 0; | 1087 | u64 flush_tid = 0; |
| 1087 | int i; | 1088 | int i; |
| @@ -1142,6 +1143,10 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
| 1142 | for (i = 0; i < CEPH_CAP_BITS; i++) | 1143 | for (i = 0; i < CEPH_CAP_BITS; i++) |
| 1143 | if (flushing & (1 << i)) | 1144 | if (flushing & (1 << i)) |
| 1144 | ci->i_cap_flush_tid[i] = flush_tid; | 1145 | ci->i_cap_flush_tid[i] = flush_tid; |
| 1146 | |||
| 1147 | follows = ci->i_head_snapc->seq; | ||
| 1148 | } else { | ||
| 1149 | follows = 0; | ||
| 1145 | } | 1150 | } |
| 1146 | 1151 | ||
| 1147 | keep = cap->implemented; | 1152 | keep = cap->implemented; |
| @@ -1155,14 +1160,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
| 1155 | mtime = inode->i_mtime; | 1160 | mtime = inode->i_mtime; |
| 1156 | atime = inode->i_atime; | 1161 | atime = inode->i_atime; |
| 1157 | time_warp_seq = ci->i_time_warp_seq; | 1162 | time_warp_seq = ci->i_time_warp_seq; |
| 1158 | follows = ci->i_snap_realm->cached_context->seq; | ||
| 1159 | uid = inode->i_uid; | 1163 | uid = inode->i_uid; |
| 1160 | gid = inode->i_gid; | 1164 | gid = inode->i_gid; |
| 1161 | mode = inode->i_mode; | 1165 | mode = inode->i_mode; |
| 1162 | 1166 | ||
| 1163 | if (dropping & CEPH_CAP_XATTR_EXCL) { | 1167 | if (flushing & CEPH_CAP_XATTR_EXCL) { |
| 1164 | __ceph_build_xattrs_blob(ci); | 1168 | __ceph_build_xattrs_blob(ci); |
| 1165 | xattr_version = ci->i_xattrs.version + 1; | 1169 | xattr_blob = ci->i_xattrs.blob; |
| 1170 | xattr_version = ci->i_xattrs.version; | ||
| 1166 | } | 1171 | } |
| 1167 | 1172 | ||
| 1168 | spin_unlock(&inode->i_lock); | 1173 | spin_unlock(&inode->i_lock); |
| @@ -1170,9 +1175,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
| 1170 | ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, | 1175 | ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, |
| 1171 | op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, | 1176 | op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, |
| 1172 | size, max_size, &mtime, &atime, time_warp_seq, | 1177 | size, max_size, &mtime, &atime, time_warp_seq, |
| 1173 | uid, gid, mode, | 1178 | uid, gid, mode, xattr_version, xattr_blob, |
| 1174 | xattr_version, | ||
| 1175 | (flushing & CEPH_CAP_XATTR_EXCL) ? ci->i_xattrs.blob : NULL, | ||
| 1176 | follows); | 1179 | follows); |
| 1177 | if (ret < 0) { | 1180 | if (ret < 0) { |
| 1178 | dout("error sending cap msg, must requeue %p\n", inode); | 1181 | dout("error sending cap msg, must requeue %p\n", inode); |
| @@ -1282,7 +1285,7 @@ retry: | |||
| 1282 | &capsnap->mtime, &capsnap->atime, | 1285 | &capsnap->mtime, &capsnap->atime, |
| 1283 | capsnap->time_warp_seq, | 1286 | capsnap->time_warp_seq, |
| 1284 | capsnap->uid, capsnap->gid, capsnap->mode, | 1287 | capsnap->uid, capsnap->gid, capsnap->mode, |
| 1285 | 0, NULL, | 1288 | capsnap->xattr_version, capsnap->xattr_blob, |
| 1286 | capsnap->follows); | 1289 | capsnap->follows); |
| 1287 | 1290 | ||
| 1288 | next_follows = capsnap->follows + 1; | 1291 | next_follows = capsnap->follows + 1; |
| @@ -1332,7 +1335,11 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
| 1332 | ceph_cap_string(was | mask)); | 1335 | ceph_cap_string(was | mask)); |
| 1333 | ci->i_dirty_caps |= mask; | 1336 | ci->i_dirty_caps |= mask; |
| 1334 | if (was == 0) { | 1337 | if (was == 0) { |
| 1335 | dout(" inode %p now dirty\n", &ci->vfs_inode); | 1338 | if (!ci->i_head_snapc) |
| 1339 | ci->i_head_snapc = ceph_get_snap_context( | ||
| 1340 | ci->i_snap_realm->cached_context); | ||
| 1341 | dout(" inode %p now dirty snapc %p\n", &ci->vfs_inode, | ||
| 1342 | ci->i_head_snapc); | ||
| 1336 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 1343 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
| 1337 | spin_lock(&mdsc->cap_dirty_lock); | 1344 | spin_lock(&mdsc->cap_dirty_lock); |
| 1338 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); | 1345 | list_add(&ci->i_dirty_item, &mdsc->cap_dirty); |
| @@ -2190,7 +2197,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
| 2190 | 2197 | ||
| 2191 | if (ci->i_head_snapc == snapc) { | 2198 | if (ci->i_head_snapc == snapc) { |
| 2192 | ci->i_wrbuffer_ref_head -= nr; | 2199 | ci->i_wrbuffer_ref_head -= nr; |
| 2193 | if (!ci->i_wrbuffer_ref_head) { | 2200 | if (ci->i_wrbuffer_ref_head == 0 && |
| 2201 | ci->i_dirty_caps == 0 && ci->i_flushing_caps == 0) { | ||
| 2202 | BUG_ON(!ci->i_head_snapc); | ||
| 2194 | ceph_put_snap_context(ci->i_head_snapc); | 2203 | ceph_put_snap_context(ci->i_head_snapc); |
| 2195 | ci->i_head_snapc = NULL; | 2204 | ci->i_head_snapc = NULL; |
| 2196 | } | 2205 | } |
| @@ -2483,6 +2492,11 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
| 2483 | dout(" inode %p now clean\n", inode); | 2492 | dout(" inode %p now clean\n", inode); |
| 2484 | BUG_ON(!list_empty(&ci->i_dirty_item)); | 2493 | BUG_ON(!list_empty(&ci->i_dirty_item)); |
| 2485 | drop = 1; | 2494 | drop = 1; |
| 2495 | if (ci->i_wrbuffer_ref_head == 0) { | ||
| 2496 | BUG_ON(!ci->i_head_snapc); | ||
| 2497 | ceph_put_snap_context(ci->i_head_snapc); | ||
| 2498 | ci->i_head_snapc = NULL; | ||
| 2499 | } | ||
| 2486 | } else { | 2500 | } else { |
| 2487 | BUG_ON(list_empty(&ci->i_dirty_item)); | 2501 | BUG_ON(list_empty(&ci->i_dirty_item)); |
| 2488 | } | 2502 | } |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 360c4f22718d..6fd8b20a8611 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
| @@ -171,6 +171,8 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
| 171 | } else if (req->r_dentry) { | 171 | } else if (req->r_dentry) { |
| 172 | path = ceph_mdsc_build_path(req->r_dentry, &pathlen, | 172 | path = ceph_mdsc_build_path(req->r_dentry, &pathlen, |
| 173 | &pathbase, 0); | 173 | &pathbase, 0); |
| 174 | if (IS_ERR(path)) | ||
| 175 | path = NULL; | ||
| 174 | spin_lock(&req->r_dentry->d_lock); | 176 | spin_lock(&req->r_dentry->d_lock); |
| 175 | seq_printf(s, " #%llx/%.*s (%s)", | 177 | seq_printf(s, " #%llx/%.*s (%s)", |
| 176 | ceph_ino(req->r_dentry->d_parent->d_inode), | 178 | ceph_ino(req->r_dentry->d_parent->d_inode), |
| @@ -187,6 +189,8 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
| 187 | if (req->r_old_dentry) { | 189 | if (req->r_old_dentry) { |
| 188 | path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, | 190 | path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, |
| 189 | &pathbase, 0); | 191 | &pathbase, 0); |
| 192 | if (IS_ERR(path)) | ||
| 193 | path = NULL; | ||
| 190 | spin_lock(&req->r_old_dentry->d_lock); | 194 | spin_lock(&req->r_old_dentry->d_lock); |
| 191 | seq_printf(s, " #%llx/%.*s (%s)", | 195 | seq_printf(s, " #%llx/%.*s (%s)", |
| 192 | ceph_ino(req->r_old_dentry->d_parent->d_inode), | 196 | ceph_ino(req->r_old_dentry->d_parent->d_inode), |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 67bbb41d5526..6e4f43ff23ec 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -46,7 +46,7 @@ int ceph_init_dentry(struct dentry *dentry) | |||
| 46 | else | 46 | else |
| 47 | dentry->d_op = &ceph_snap_dentry_ops; | 47 | dentry->d_op = &ceph_snap_dentry_ops; |
| 48 | 48 | ||
| 49 | di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS); | 49 | di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); |
| 50 | if (!di) | 50 | if (!di) |
| 51 | return -ENOMEM; /* oh well */ | 51 | return -ENOMEM; /* oh well */ |
| 52 | 52 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5d893d31e399..e7cca414da03 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -677,6 +677,7 @@ static int fill_inode(struct inode *inode, | |||
| 677 | if (ci->i_files == 0 && ci->i_subdirs == 0 && | 677 | if (ci->i_files == 0 && ci->i_subdirs == 0 && |
| 678 | ceph_snap(inode) == CEPH_NOSNAP && | 678 | ceph_snap(inode) == CEPH_NOSNAP && |
| 679 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && | 679 | (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && |
| 680 | (issued & CEPH_CAP_FILE_EXCL) == 0 && | ||
| 680 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { | 681 | (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { |
| 681 | dout(" marking %p complete (empty)\n", inode); | 682 | dout(" marking %p complete (empty)\n", inode); |
| 682 | ci->i_ceph_flags |= CEPH_I_COMPLETE; | 683 | ci->i_ceph_flags |= CEPH_I_COMPLETE; |
| @@ -1229,11 +1230,11 @@ retry_lookup: | |||
| 1229 | in = dn->d_inode; | 1230 | in = dn->d_inode; |
| 1230 | } else { | 1231 | } else { |
| 1231 | in = ceph_get_inode(parent->d_sb, vino); | 1232 | in = ceph_get_inode(parent->d_sb, vino); |
| 1232 | if (in == NULL) { | 1233 | if (IS_ERR(in)) { |
| 1233 | dout("new_inode badness\n"); | 1234 | dout("new_inode badness\n"); |
| 1234 | d_delete(dn); | 1235 | d_delete(dn); |
| 1235 | dput(dn); | 1236 | dput(dn); |
| 1236 | err = -ENOMEM; | 1237 | err = PTR_ERR(in); |
| 1237 | goto out; | 1238 | goto out; |
| 1238 | } | 1239 | } |
| 1239 | dn = splice_dentry(dn, in, NULL); | 1240 | dn = splice_dentry(dn, in, NULL); |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ae85af06454f..ff4e753aae92 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
| @@ -82,7 +82,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
| 82 | length = fl->fl_end - fl->fl_start + 1; | 82 | length = fl->fl_end - fl->fl_start + 1; |
| 83 | 83 | ||
| 84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
| 85 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | 85 | (u64)fl->fl_pid, |
| 86 | (u64)(unsigned long)fl->fl_nspid, | ||
| 86 | lock_cmd, fl->fl_start, | 87 | lock_cmd, fl->fl_start, |
| 87 | length, wait); | 88 | length, wait); |
| 88 | if (!err) { | 89 | if (!err) { |
| @@ -92,7 +93,8 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
| 92 | /* undo! This should only happen if the kernel detects | 93 | /* undo! This should only happen if the kernel detects |
| 93 | * local deadlock. */ | 94 | * local deadlock. */ |
| 94 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 95 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
| 95 | (u64)fl->fl_pid, (u64)fl->fl_nspid, | 96 | (u64)fl->fl_pid, |
| 97 | (u64)(unsigned long)fl->fl_nspid, | ||
| 96 | CEPH_LOCK_UNLOCK, fl->fl_start, | 98 | CEPH_LOCK_UNLOCK, fl->fl_start, |
| 97 | length, 0); | 99 | length, 0); |
| 98 | dout("got %d on posix_lock_file, undid lock", err); | 100 | dout("got %d on posix_lock_file, undid lock", err); |
| @@ -132,7 +134,8 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 132 | length = fl->fl_end - fl->fl_start + 1; | 134 | length = fl->fl_end - fl->fl_start + 1; |
| 133 | 135 | ||
| 134 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | 136 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, |
| 135 | file, (u64)fl->fl_pid, (u64)fl->fl_nspid, | 137 | file, (u64)fl->fl_pid, |
| 138 | (u64)(unsigned long)fl->fl_nspid, | ||
| 136 | lock_cmd, fl->fl_start, | 139 | lock_cmd, fl->fl_start, |
| 137 | length, wait); | 140 | length, wait); |
| 138 | if (!err) { | 141 | if (!err) { |
| @@ -141,7 +144,7 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 141 | ceph_lock_message(CEPH_LOCK_FLOCK, | 144 | ceph_lock_message(CEPH_LOCK_FLOCK, |
| 142 | CEPH_MDS_OP_SETFILELOCK, | 145 | CEPH_MDS_OP_SETFILELOCK, |
| 143 | file, (u64)fl->fl_pid, | 146 | file, (u64)fl->fl_pid, |
| 144 | (u64)fl->fl_nspid, | 147 | (u64)(unsigned long)fl->fl_nspid, |
| 145 | CEPH_LOCK_UNLOCK, fl->fl_start, | 148 | CEPH_LOCK_UNLOCK, fl->fl_start, |
| 146 | length, 0); | 149 | length, 0); |
| 147 | dout("got %d on flock_lock_file_wait, undid lock", err); | 150 | dout("got %d on flock_lock_file_wait, undid lock", err); |
| @@ -235,7 +238,8 @@ int lock_to_ceph_filelock(struct file_lock *lock, | |||
| 235 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | 238 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); |
| 236 | cephlock->client = cpu_to_le64(0); | 239 | cephlock->client = cpu_to_le64(0); |
| 237 | cephlock->pid = cpu_to_le64(lock->fl_pid); | 240 | cephlock->pid = cpu_to_le64(lock->fl_pid); |
| 238 | cephlock->pid_namespace = cpu_to_le64((u64)lock->fl_nspid); | 241 | cephlock->pid_namespace = |
| 242 | cpu_to_le64((u64)(unsigned long)lock->fl_nspid); | ||
| 239 | 243 | ||
| 240 | switch (lock->fl_type) { | 244 | switch (lock->fl_type) { |
| 241 | case F_RDLCK: | 245 | case F_RDLCK: |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a75ddbf9fe37..f091b1351786 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -560,6 +560,13 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
| 560 | * | 560 | * |
| 561 | * Called under mdsc->mutex. | 561 | * Called under mdsc->mutex. |
| 562 | */ | 562 | */ |
| 563 | struct dentry *get_nonsnap_parent(struct dentry *dentry) | ||
| 564 | { | ||
| 565 | while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP) | ||
| 566 | dentry = dentry->d_parent; | ||
| 567 | return dentry; | ||
| 568 | } | ||
| 569 | |||
| 563 | static int __choose_mds(struct ceph_mds_client *mdsc, | 570 | static int __choose_mds(struct ceph_mds_client *mdsc, |
| 564 | struct ceph_mds_request *req) | 571 | struct ceph_mds_request *req) |
| 565 | { | 572 | { |
| @@ -590,14 +597,29 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
| 590 | if (req->r_inode) { | 597 | if (req->r_inode) { |
| 591 | inode = req->r_inode; | 598 | inode = req->r_inode; |
| 592 | } else if (req->r_dentry) { | 599 | } else if (req->r_dentry) { |
| 593 | if (req->r_dentry->d_inode) { | 600 | struct inode *dir = req->r_dentry->d_parent->d_inode; |
| 601 | |||
| 602 | if (dir->i_sb != mdsc->client->sb) { | ||
| 603 | /* not this fs! */ | ||
| 604 | inode = req->r_dentry->d_inode; | ||
| 605 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { | ||
| 606 | /* direct snapped/virtual snapdir requests | ||
| 607 | * based on parent dir inode */ | ||
| 608 | struct dentry *dn = | ||
| 609 | get_nonsnap_parent(req->r_dentry->d_parent); | ||
| 610 | inode = dn->d_inode; | ||
| 611 | dout("__choose_mds using nonsnap parent %p\n", inode); | ||
| 612 | } else if (req->r_dentry->d_inode) { | ||
| 613 | /* dentry target */ | ||
| 594 | inode = req->r_dentry->d_inode; | 614 | inode = req->r_dentry->d_inode; |
| 595 | } else { | 615 | } else { |
| 596 | inode = req->r_dentry->d_parent->d_inode; | 616 | /* dir + name */ |
| 617 | inode = dir; | ||
| 597 | hash = req->r_dentry->d_name.hash; | 618 | hash = req->r_dentry->d_name.hash; |
| 598 | is_hash = true; | 619 | is_hash = true; |
| 599 | } | 620 | } |
| 600 | } | 621 | } |
| 622 | |||
| 601 | dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, | 623 | dout("__choose_mds %p is_hash=%d (%d) mode %d\n", inode, (int)is_hash, |
| 602 | (int)hash, mode); | 624 | (int)hash, mode); |
| 603 | if (!inode) | 625 | if (!inode) |
| @@ -2208,7 +2230,7 @@ static void handle_session(struct ceph_mds_session *session, | |||
| 2208 | pr_info("mds%d reconnect denied\n", session->s_mds); | 2230 | pr_info("mds%d reconnect denied\n", session->s_mds); |
| 2209 | remove_session_caps(session); | 2231 | remove_session_caps(session); |
| 2210 | wake = 1; /* for good measure */ | 2232 | wake = 1; /* for good measure */ |
| 2211 | complete_all(&mdsc->session_close_waiters); | 2233 | wake_up_all(&mdsc->session_close_wq); |
| 2212 | kick_requests(mdsc, mds); | 2234 | kick_requests(mdsc, mds); |
| 2213 | break; | 2235 | break; |
| 2214 | 2236 | ||
| @@ -2302,7 +2324,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2302 | path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0); | 2324 | path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 0); |
| 2303 | if (IS_ERR(path)) { | 2325 | if (IS_ERR(path)) { |
| 2304 | err = PTR_ERR(path); | 2326 | err = PTR_ERR(path); |
| 2305 | BUG_ON(err); | 2327 | goto out_dput; |
| 2306 | } | 2328 | } |
| 2307 | } else { | 2329 | } else { |
| 2308 | path = NULL; | 2330 | path = NULL; |
| @@ -2310,7 +2332,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2310 | } | 2332 | } |
| 2311 | err = ceph_pagelist_encode_string(pagelist, path, pathlen); | 2333 | err = ceph_pagelist_encode_string(pagelist, path, pathlen); |
| 2312 | if (err) | 2334 | if (err) |
| 2313 | goto out; | 2335 | goto out_free; |
| 2314 | 2336 | ||
| 2315 | spin_lock(&inode->i_lock); | 2337 | spin_lock(&inode->i_lock); |
| 2316 | cap->seq = 0; /* reset cap seq */ | 2338 | cap->seq = 0; /* reset cap seq */ |
| @@ -2354,8 +2376,9 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2354 | unlock_kernel(); | 2376 | unlock_kernel(); |
| 2355 | } | 2377 | } |
| 2356 | 2378 | ||
| 2357 | out: | 2379 | out_free: |
| 2358 | kfree(path); | 2380 | kfree(path); |
| 2381 | out_dput: | ||
| 2359 | dput(dentry); | 2382 | dput(dentry); |
| 2360 | return err; | 2383 | return err; |
| 2361 | } | 2384 | } |
| @@ -2876,7 +2899,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
| 2876 | return -ENOMEM; | 2899 | return -ENOMEM; |
| 2877 | 2900 | ||
| 2878 | init_completion(&mdsc->safe_umount_waiters); | 2901 | init_completion(&mdsc->safe_umount_waiters); |
| 2879 | init_completion(&mdsc->session_close_waiters); | 2902 | init_waitqueue_head(&mdsc->session_close_wq); |
| 2880 | INIT_LIST_HEAD(&mdsc->waiting_for_map); | 2903 | INIT_LIST_HEAD(&mdsc->waiting_for_map); |
| 2881 | mdsc->sessions = NULL; | 2904 | mdsc->sessions = NULL; |
| 2882 | mdsc->max_sessions = 0; | 2905 | mdsc->max_sessions = 0; |
| @@ -3021,6 +3044,23 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
| 3021 | wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); | 3044 | wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); |
| 3022 | } | 3045 | } |
| 3023 | 3046 | ||
| 3047 | /* | ||
| 3048 | * true if all sessions are closed, or we force unmount | ||
| 3049 | */ | ||
| 3050 | bool done_closing_sessions(struct ceph_mds_client *mdsc) | ||
| 3051 | { | ||
| 3052 | int i, n = 0; | ||
| 3053 | |||
| 3054 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | ||
| 3055 | return true; | ||
| 3056 | |||
| 3057 | mutex_lock(&mdsc->mutex); | ||
| 3058 | for (i = 0; i < mdsc->max_sessions; i++) | ||
| 3059 | if (mdsc->sessions[i]) | ||
| 3060 | n++; | ||
| 3061 | mutex_unlock(&mdsc->mutex); | ||
| 3062 | return n == 0; | ||
| 3063 | } | ||
| 3024 | 3064 | ||
| 3025 | /* | 3065 | /* |
| 3026 | * called after sb is ro. | 3066 | * called after sb is ro. |
| @@ -3029,45 +3069,32 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
| 3029 | { | 3069 | { |
| 3030 | struct ceph_mds_session *session; | 3070 | struct ceph_mds_session *session; |
| 3031 | int i; | 3071 | int i; |
| 3032 | int n; | ||
| 3033 | struct ceph_client *client = mdsc->client; | 3072 | struct ceph_client *client = mdsc->client; |
| 3034 | unsigned long started, timeout = client->mount_args->mount_timeout * HZ; | 3073 | unsigned long timeout = client->mount_args->mount_timeout * HZ; |
| 3035 | 3074 | ||
| 3036 | dout("close_sessions\n"); | 3075 | dout("close_sessions\n"); |
| 3037 | 3076 | ||
| 3038 | mutex_lock(&mdsc->mutex); | ||
| 3039 | |||
| 3040 | /* close sessions */ | 3077 | /* close sessions */ |
| 3041 | started = jiffies; | 3078 | mutex_lock(&mdsc->mutex); |
| 3042 | while (time_before(jiffies, started + timeout)) { | 3079 | for (i = 0; i < mdsc->max_sessions; i++) { |
| 3043 | dout("closing sessions\n"); | 3080 | session = __ceph_lookup_mds_session(mdsc, i); |
| 3044 | n = 0; | 3081 | if (!session) |
| 3045 | for (i = 0; i < mdsc->max_sessions; i++) { | 3082 | continue; |
| 3046 | session = __ceph_lookup_mds_session(mdsc, i); | ||
| 3047 | if (!session) | ||
| 3048 | continue; | ||
| 3049 | mutex_unlock(&mdsc->mutex); | ||
| 3050 | mutex_lock(&session->s_mutex); | ||
| 3051 | __close_session(mdsc, session); | ||
| 3052 | mutex_unlock(&session->s_mutex); | ||
| 3053 | ceph_put_mds_session(session); | ||
| 3054 | mutex_lock(&mdsc->mutex); | ||
| 3055 | n++; | ||
| 3056 | } | ||
| 3057 | if (n == 0) | ||
| 3058 | break; | ||
| 3059 | |||
| 3060 | if (client->mount_state == CEPH_MOUNT_SHUTDOWN) | ||
| 3061 | break; | ||
| 3062 | |||
| 3063 | dout("waiting for sessions to close\n"); | ||
| 3064 | mutex_unlock(&mdsc->mutex); | 3083 | mutex_unlock(&mdsc->mutex); |
| 3065 | wait_for_completion_timeout(&mdsc->session_close_waiters, | 3084 | mutex_lock(&session->s_mutex); |
| 3066 | timeout); | 3085 | __close_session(mdsc, session); |
| 3086 | mutex_unlock(&session->s_mutex); | ||
| 3087 | ceph_put_mds_session(session); | ||
| 3067 | mutex_lock(&mdsc->mutex); | 3088 | mutex_lock(&mdsc->mutex); |
| 3068 | } | 3089 | } |
| 3090 | mutex_unlock(&mdsc->mutex); | ||
| 3091 | |||
| 3092 | dout("waiting for sessions to close\n"); | ||
| 3093 | wait_event_timeout(mdsc->session_close_wq, done_closing_sessions(mdsc), | ||
| 3094 | timeout); | ||
| 3069 | 3095 | ||
| 3070 | /* tear down remaining sessions */ | 3096 | /* tear down remaining sessions */ |
| 3097 | mutex_lock(&mdsc->mutex); | ||
| 3071 | for (i = 0; i < mdsc->max_sessions; i++) { | 3098 | for (i = 0; i < mdsc->max_sessions; i++) { |
| 3072 | if (mdsc->sessions[i]) { | 3099 | if (mdsc->sessions[i]) { |
| 3073 | session = get_session(mdsc->sessions[i]); | 3100 | session = get_session(mdsc->sessions[i]); |
| @@ -3080,9 +3107,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
| 3080 | mutex_lock(&mdsc->mutex); | 3107 | mutex_lock(&mdsc->mutex); |
| 3081 | } | 3108 | } |
| 3082 | } | 3109 | } |
| 3083 | |||
| 3084 | WARN_ON(!list_empty(&mdsc->cap_delay_list)); | 3110 | WARN_ON(!list_empty(&mdsc->cap_delay_list)); |
| 3085 | |||
| 3086 | mutex_unlock(&mdsc->mutex); | 3111 | mutex_unlock(&mdsc->mutex); |
| 3087 | 3112 | ||
| 3088 | ceph_cleanup_empty_realms(mdsc); | 3113 | ceph_cleanup_empty_realms(mdsc); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index ab7e89f5e344..c98267ce6d2a 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -234,7 +234,8 @@ struct ceph_mds_client { | |||
| 234 | struct mutex mutex; /* all nested structures */ | 234 | struct mutex mutex; /* all nested structures */ |
| 235 | 235 | ||
| 236 | struct ceph_mdsmap *mdsmap; | 236 | struct ceph_mdsmap *mdsmap; |
| 237 | struct completion safe_umount_waiters, session_close_waiters; | 237 | struct completion safe_umount_waiters; |
| 238 | wait_queue_head_t session_close_wq; | ||
| 238 | struct list_head waiting_for_map; | 239 | struct list_head waiting_for_map; |
| 239 | 240 | ||
| 240 | struct ceph_mds_session **sessions; /* NULL for mds if no session */ | 241 | struct ceph_mds_session **sessions; /* NULL for mds if no session */ |
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c index bed6391e52c7..dfced1dacbcd 100644 --- a/fs/ceph/osd_client.c +++ b/fs/ceph/osd_client.c | |||
| @@ -661,7 +661,7 @@ static int __send_request(struct ceph_osd_client *osdc, | |||
| 661 | reqhead->reassert_version = req->r_reassert_version; | 661 | reqhead->reassert_version = req->r_reassert_version; |
| 662 | 662 | ||
| 663 | req->r_stamp = jiffies; | 663 | req->r_stamp = jiffies; |
| 664 | list_move_tail(&osdc->req_lru, &req->r_req_lru_item); | 664 | list_move_tail(&req->r_req_lru_item, &osdc->req_lru); |
| 665 | 665 | ||
| 666 | ceph_msg_get(req->r_request); /* send consumes a ref */ | 666 | ceph_msg_get(req->r_request); /* send consumes a ref */ |
| 667 | ceph_con_send(&req->r_osd->o_con, req->r_request); | 667 | ceph_con_send(&req->r_osd->o_con, req->r_request); |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index c0b26b6badba..4868b9dcac5a 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
| @@ -435,7 +435,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
| 435 | { | 435 | { |
| 436 | struct inode *inode = &ci->vfs_inode; | 436 | struct inode *inode = &ci->vfs_inode; |
| 437 | struct ceph_cap_snap *capsnap; | 437 | struct ceph_cap_snap *capsnap; |
| 438 | int used; | 438 | int used, dirty; |
| 439 | 439 | ||
| 440 | capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); | 440 | capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); |
| 441 | if (!capsnap) { | 441 | if (!capsnap) { |
| @@ -445,6 +445,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
| 445 | 445 | ||
| 446 | spin_lock(&inode->i_lock); | 446 | spin_lock(&inode->i_lock); |
| 447 | used = __ceph_caps_used(ci); | 447 | used = __ceph_caps_used(ci); |
| 448 | dirty = __ceph_caps_dirty(ci); | ||
| 448 | if (__ceph_have_pending_cap_snap(ci)) { | 449 | if (__ceph_have_pending_cap_snap(ci)) { |
| 449 | /* there is no point in queuing multiple "pending" cap_snaps, | 450 | /* there is no point in queuing multiple "pending" cap_snaps, |
| 450 | as no new writes are allowed to start when pending, so any | 451 | as no new writes are allowed to start when pending, so any |
| @@ -452,11 +453,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
| 452 | cap_snap. lucky us. */ | 453 | cap_snap. lucky us. */ |
| 453 | dout("queue_cap_snap %p already pending\n", inode); | 454 | dout("queue_cap_snap %p already pending\n", inode); |
| 454 | kfree(capsnap); | 455 | kfree(capsnap); |
| 455 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR)) { | 456 | } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) || |
| 457 | (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| | ||
| 458 | CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) { | ||
| 456 | struct ceph_snap_context *snapc = ci->i_head_snapc; | 459 | struct ceph_snap_context *snapc = ci->i_head_snapc; |
| 457 | 460 | ||
| 461 | dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode, | ||
| 462 | capsnap, snapc); | ||
| 458 | igrab(inode); | 463 | igrab(inode); |
| 459 | 464 | ||
| 460 | atomic_set(&capsnap->nref, 1); | 465 | atomic_set(&capsnap->nref, 1); |
| 461 | capsnap->ci = ci; | 466 | capsnap->ci = ci; |
| 462 | INIT_LIST_HEAD(&capsnap->ci_item); | 467 | INIT_LIST_HEAD(&capsnap->ci_item); |
| @@ -464,15 +469,21 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
| 464 | 469 | ||
| 465 | capsnap->follows = snapc->seq - 1; | 470 | capsnap->follows = snapc->seq - 1; |
| 466 | capsnap->issued = __ceph_caps_issued(ci, NULL); | 471 | capsnap->issued = __ceph_caps_issued(ci, NULL); |
| 467 | capsnap->dirty = __ceph_caps_dirty(ci); | 472 | capsnap->dirty = dirty; |
| 468 | 473 | ||
| 469 | capsnap->mode = inode->i_mode; | 474 | capsnap->mode = inode->i_mode; |
| 470 | capsnap->uid = inode->i_uid; | 475 | capsnap->uid = inode->i_uid; |
| 471 | capsnap->gid = inode->i_gid; | 476 | capsnap->gid = inode->i_gid; |
| 472 | 477 | ||
| 473 | /* fixme? */ | 478 | if (dirty & CEPH_CAP_XATTR_EXCL) { |
| 474 | capsnap->xattr_blob = NULL; | 479 | __ceph_build_xattrs_blob(ci); |
| 475 | capsnap->xattr_len = 0; | 480 | capsnap->xattr_blob = |
| 481 | ceph_buffer_get(ci->i_xattrs.blob); | ||
| 482 | capsnap->xattr_version = ci->i_xattrs.version; | ||
| 483 | } else { | ||
| 484 | capsnap->xattr_blob = NULL; | ||
| 485 | capsnap->xattr_version = 0; | ||
| 486 | } | ||
| 476 | 487 | ||
| 477 | /* dirty page count moved from _head to this cap_snap; | 488 | /* dirty page count moved from _head to this cap_snap; |
| 478 | all subsequent writes page dirties occur _after_ this | 489 | all subsequent writes page dirties occur _after_ this |
| @@ -480,7 +491,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
| 480 | capsnap->dirty_pages = ci->i_wrbuffer_ref_head; | 491 | capsnap->dirty_pages = ci->i_wrbuffer_ref_head; |
| 481 | ci->i_wrbuffer_ref_head = 0; | 492 | ci->i_wrbuffer_ref_head = 0; |
| 482 | capsnap->context = snapc; | 493 | capsnap->context = snapc; |
| 483 | ci->i_head_snapc = NULL; | 494 | ci->i_head_snapc = |
| 495 | ceph_get_snap_context(ci->i_snap_realm->cached_context); | ||
| 496 | dout(" new snapc is %p\n", ci->i_head_snapc); | ||
| 484 | list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); | 497 | list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); |
| 485 | 498 | ||
| 486 | if (used & CEPH_CAP_FILE_WR) { | 499 | if (used & CEPH_CAP_FILE_WR) { |
| @@ -539,6 +552,41 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
| 539 | return 1; /* caller may want to ceph_flush_snaps */ | 552 | return 1; /* caller may want to ceph_flush_snaps */ |
| 540 | } | 553 | } |
| 541 | 554 | ||
| 555 | /* | ||
| 556 | * Queue cap_snaps for snap writeback for this realm and its children. | ||
| 557 | * Called under snap_rwsem, so realm topology won't change. | ||
| 558 | */ | ||
| 559 | static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) | ||
| 560 | { | ||
| 561 | struct ceph_inode_info *ci; | ||
| 562 | struct inode *lastinode = NULL; | ||
| 563 | struct ceph_snap_realm *child; | ||
| 564 | |||
| 565 | dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino); | ||
| 566 | |||
| 567 | spin_lock(&realm->inodes_with_caps_lock); | ||
| 568 | list_for_each_entry(ci, &realm->inodes_with_caps, | ||
| 569 | i_snap_realm_item) { | ||
| 570 | struct inode *inode = igrab(&ci->vfs_inode); | ||
| 571 | if (!inode) | ||
| 572 | continue; | ||
| 573 | spin_unlock(&realm->inodes_with_caps_lock); | ||
| 574 | if (lastinode) | ||
| 575 | iput(lastinode); | ||
| 576 | lastinode = inode; | ||
| 577 | ceph_queue_cap_snap(ci); | ||
| 578 | spin_lock(&realm->inodes_with_caps_lock); | ||
| 579 | } | ||
| 580 | spin_unlock(&realm->inodes_with_caps_lock); | ||
| 581 | if (lastinode) | ||
| 582 | iput(lastinode); | ||
| 583 | |||
| 584 | dout("queue_realm_cap_snaps %p %llx children\n", realm, realm->ino); | ||
| 585 | list_for_each_entry(child, &realm->children, child_item) | ||
| 586 | queue_realm_cap_snaps(child); | ||
| 587 | |||
| 588 | dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); | ||
| 589 | } | ||
| 542 | 590 | ||
| 543 | /* | 591 | /* |
| 544 | * Parse and apply a snapblob "snap trace" from the MDS. This specifies | 592 | * Parse and apply a snapblob "snap trace" from the MDS. This specifies |
| @@ -589,29 +637,8 @@ more: | |||
| 589 | * | 637 | * |
| 590 | * ...unless it's a snap deletion! | 638 | * ...unless it's a snap deletion! |
| 591 | */ | 639 | */ |
| 592 | if (!deletion) { | 640 | if (!deletion) |
| 593 | struct ceph_inode_info *ci; | 641 | queue_realm_cap_snaps(realm); |
| 594 | struct inode *lastinode = NULL; | ||
| 595 | |||
| 596 | spin_lock(&realm->inodes_with_caps_lock); | ||
| 597 | list_for_each_entry(ci, &realm->inodes_with_caps, | ||
| 598 | i_snap_realm_item) { | ||
| 599 | struct inode *inode = igrab(&ci->vfs_inode); | ||
| 600 | if (!inode) | ||
| 601 | continue; | ||
| 602 | spin_unlock(&realm->inodes_with_caps_lock); | ||
| 603 | if (lastinode) | ||
| 604 | iput(lastinode); | ||
| 605 | lastinode = inode; | ||
| 606 | ceph_queue_cap_snap(ci); | ||
| 607 | spin_lock(&realm->inodes_with_caps_lock); | ||
| 608 | } | ||
| 609 | spin_unlock(&realm->inodes_with_caps_lock); | ||
| 610 | if (lastinode) | ||
| 611 | iput(lastinode); | ||
| 612 | dout("update_snap_trace cap_snaps queued\n"); | ||
| 613 | } | ||
| 614 | |||
| 615 | } else { | 642 | } else { |
| 616 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | 643 | dout("update_snap_trace %llx %p seq %lld unchanged\n", |
| 617 | realm->ino, realm, realm->seq); | 644 | realm->ino, realm, realm->seq); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 2482d696f0de..c33897ae5725 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -216,8 +216,7 @@ struct ceph_cap_snap { | |||
| 216 | uid_t uid; | 216 | uid_t uid; |
| 217 | gid_t gid; | 217 | gid_t gid; |
| 218 | 218 | ||
| 219 | void *xattr_blob; | 219 | struct ceph_buffer *xattr_blob; |
| 220 | int xattr_len; | ||
| 221 | u64 xattr_version; | 220 | u64 xattr_version; |
| 222 | 221 | ||
| 223 | u64 size; | 222 | u64 size; |
| @@ -229,8 +228,11 @@ struct ceph_cap_snap { | |||
| 229 | 228 | ||
| 230 | static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) | 229 | static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) |
| 231 | { | 230 | { |
| 232 | if (atomic_dec_and_test(&capsnap->nref)) | 231 | if (atomic_dec_and_test(&capsnap->nref)) { |
| 232 | if (capsnap->xattr_blob) | ||
| 233 | ceph_buffer_put(capsnap->xattr_blob); | ||
| 233 | kfree(capsnap); | 234 | kfree(capsnap); |
| 235 | } | ||
| 234 | } | 236 | } |
| 235 | 237 | ||
| 236 | /* | 238 | /* |
| @@ -342,7 +344,8 @@ struct ceph_inode_info { | |||
| 342 | unsigned i_cap_exporting_issued; | 344 | unsigned i_cap_exporting_issued; |
| 343 | struct ceph_cap_reservation i_cap_migration_resv; | 345 | struct ceph_cap_reservation i_cap_migration_resv; |
| 344 | struct list_head i_cap_snaps; /* snapped state pending flush to mds */ | 346 | struct list_head i_cap_snaps; /* snapped state pending flush to mds */ |
| 345 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 */ | 347 | struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or |
| 348 | dirty|flushing caps */ | ||
| 346 | unsigned i_snap_caps; /* cap bits for snapped files */ | 349 | unsigned i_snap_caps; /* cap bits for snapped files */ |
| 347 | 350 | ||
| 348 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ | 351 | int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 097a2654c00f..9578af610b73 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
| @@ -485,6 +485,7 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci) | |||
| 485 | ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; | 485 | ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob; |
| 486 | ci->i_xattrs.prealloc_blob = NULL; | 486 | ci->i_xattrs.prealloc_blob = NULL; |
| 487 | ci->i_xattrs.dirty = false; | 487 | ci->i_xattrs.dirty = false; |
| 488 | ci->i_xattrs.version++; | ||
| 488 | } | 489 | } |
| 489 | } | 490 | } |
| 490 | 491 | ||
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index a803f5e33471..e3bccac1f025 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
| @@ -1126,6 +1126,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) | |||
| 1126 | task_io_account_write(PAGE_CACHE_SIZE); | 1126 | task_io_account_write(PAGE_CACHE_SIZE); |
| 1127 | } | 1127 | } |
| 1128 | } | 1128 | } |
| 1129 | EXPORT_SYMBOL(account_page_dirtied); | ||
| 1129 | 1130 | ||
| 1130 | /* | 1131 | /* |
| 1131 | * For address_spaces which do not use buffers. Just tag the page as dirty in | 1132 | * For address_spaces which do not use buffers. Just tag the page as dirty in |
