aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-09-21 14:20:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-09-21 14:20:10 -0400
commit19746cad00a00a7a2e3eb0640d317d6e7c2e8cc0 (patch)
tree3dfd66dce28b3b02f7e293b28b57191ca23f691c
parent0ffe37de76a57ba38d960e370c2f8f1d799c94a1 (diff)
parentbe4f104dfd3b5e3ae262bff607965cfc38027dec (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: ceph: select CRYPTO ceph: check mapping to determine if FILE_CACHE cap is used ceph: only send one flushsnap per cap_snap per mds session ceph: fix cap_snap and realm split ceph: stop sending FLUSHSNAPs when we hit a dirty capsnap ceph: correctly set 'follows' in flushsnap messages ceph: fix dn offset during readdir_prepopulate ceph: fix file offset wrapping at 4GB on 32-bit archs ceph: fix reconnect encoding for old servers ceph: fix pagelist kunmap tail ceph: fix null pointer deref on anon root dentry release
-rw-r--r--fs/ceph/Kconfig1
-rw-r--r--fs/ceph/addr.c7
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/dir.c10
-rw-r--r--fs/ceph/inode.c11
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/ceph/pagelist.c12
-rw-r--r--fs/ceph/snap.c92
-rw-r--r--fs/ceph/super.h5
9 files changed, 84 insertions, 83 deletions
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig
index bc87b9c1d27e..0fcd2640c23f 100644
--- a/fs/ceph/Kconfig
+++ b/fs/ceph/Kconfig
@@ -3,6 +3,7 @@ config CEPH_FS
3 depends on INET && EXPERIMENTAL 3 depends on INET && EXPERIMENTAL
4 select LIBCRC32C 4 select LIBCRC32C
5 select CRYPTO_AES 5 select CRYPTO_AES
6 select CRYPTO
6 help 7 help
7 Choose Y or M here to include support for mounting the 8 Choose Y or M here to include support for mounting the
8 experimental Ceph distributed file system. Ceph is an extremely 9 experimental Ceph distributed file system. Ceph is an extremely
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4cfce1ee31fa..efbc604001c8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -411,8 +411,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
411 if (i_size < page_off + len) 411 if (i_size < page_off + len)
412 len = i_size - page_off; 412 len = i_size - page_off;
413 413
414 dout("writepage %p page %p index %lu on %llu~%u\n", 414 dout("writepage %p page %p index %lu on %llu~%u snapc %p\n",
415 inode, page, page->index, page_off, len); 415 inode, page, page->index, page_off, len, snapc);
416 416
417 writeback_stat = atomic_long_inc_return(&client->writeback_count); 417 writeback_stat = atomic_long_inc_return(&client->writeback_count);
418 if (writeback_stat > 418 if (writeback_stat >
@@ -766,7 +766,8 @@ get_more_pages:
766 /* ok */ 766 /* ok */
767 if (locked_pages == 0) { 767 if (locked_pages == 0) {
768 /* prepare async write request */ 768 /* prepare async write request */
769 offset = page->index << PAGE_CACHE_SHIFT; 769 offset = (unsigned long long)page->index
770 << PAGE_CACHE_SHIFT;
770 len = wsize; 771 len = wsize;
771 req = ceph_osdc_new_request(&client->osdc, 772 req = ceph_osdc_new_request(&client->osdc,
772 &ci->i_layout, 773 &ci->i_layout,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index a2069b6680ae..73c153092f72 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -814,7 +814,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci)
814 used |= CEPH_CAP_PIN; 814 used |= CEPH_CAP_PIN;
815 if (ci->i_rd_ref) 815 if (ci->i_rd_ref)
816 used |= CEPH_CAP_FILE_RD; 816 used |= CEPH_CAP_FILE_RD;
817 if (ci->i_rdcache_ref || ci->i_rdcache_gen) 817 if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages)
818 used |= CEPH_CAP_FILE_CACHE; 818 used |= CEPH_CAP_FILE_CACHE;
819 if (ci->i_wr_ref) 819 if (ci->i_wr_ref)
820 used |= CEPH_CAP_FILE_WR; 820 used |= CEPH_CAP_FILE_WR;
@@ -1195,10 +1195,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1195 * asynchronously back to the MDS once sync writes complete and dirty 1195 * asynchronously back to the MDS once sync writes complete and dirty
1196 * data is written out. 1196 * data is written out.
1197 * 1197 *
1198 * Unless @again is true, skip cap_snaps that were already sent to
1199 * the MDS (i.e., during this session).
1200 *
1198 * Called under i_lock. Takes s_mutex as needed. 1201 * Called under i_lock. Takes s_mutex as needed.
1199 */ 1202 */
1200void __ceph_flush_snaps(struct ceph_inode_info *ci, 1203void __ceph_flush_snaps(struct ceph_inode_info *ci,
1201 struct ceph_mds_session **psession) 1204 struct ceph_mds_session **psession,
1205 int again)
1202 __releases(ci->vfs_inode->i_lock) 1206 __releases(ci->vfs_inode->i_lock)
1203 __acquires(ci->vfs_inode->i_lock) 1207 __acquires(ci->vfs_inode->i_lock)
1204{ 1208{
@@ -1227,7 +1231,7 @@ retry:
1227 * pages to be written out. 1231 * pages to be written out.
1228 */ 1232 */
1229 if (capsnap->dirty_pages || capsnap->writing) 1233 if (capsnap->dirty_pages || capsnap->writing)
1230 continue; 1234 break;
1231 1235
1232 /* 1236 /*
1233 * if cap writeback already occurred, we should have dropped 1237 * if cap writeback already occurred, we should have dropped
@@ -1240,6 +1244,13 @@ retry:
1240 dout("no auth cap (migrating?), doing nothing\n"); 1244 dout("no auth cap (migrating?), doing nothing\n");
1241 goto out; 1245 goto out;
1242 } 1246 }
1247
1248 /* only flush each capsnap once */
1249 if (!again && !list_empty(&capsnap->flushing_item)) {
1250 dout("already flushed %p, skipping\n", capsnap);
1251 continue;
1252 }
1253
1243 mds = ci->i_auth_cap->session->s_mds; 1254 mds = ci->i_auth_cap->session->s_mds;
1244 mseq = ci->i_auth_cap->mseq; 1255 mseq = ci->i_auth_cap->mseq;
1245 1256
@@ -1276,8 +1287,8 @@ retry:
1276 &session->s_cap_snaps_flushing); 1287 &session->s_cap_snaps_flushing);
1277 spin_unlock(&inode->i_lock); 1288 spin_unlock(&inode->i_lock);
1278 1289
1279 dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", 1290 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
1280 inode, capsnap, next_follows, capsnap->size); 1291 inode, capsnap, capsnap->follows, capsnap->flush_tid);
1281 send_cap_msg(session, ceph_vino(inode).ino, 0, 1292 send_cap_msg(session, ceph_vino(inode).ino, 0,
1282 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, 1293 CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0,
1283 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, 1294 capsnap->dirty, 0, capsnap->flush_tid, 0, mseq,
@@ -1314,7 +1325,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci)
1314 struct inode *inode = &ci->vfs_inode; 1325 struct inode *inode = &ci->vfs_inode;
1315 1326
1316 spin_lock(&inode->i_lock); 1327 spin_lock(&inode->i_lock);
1317 __ceph_flush_snaps(ci, NULL); 1328 __ceph_flush_snaps(ci, NULL, 0);
1318 spin_unlock(&inode->i_lock); 1329 spin_unlock(&inode->i_lock);
1319} 1330}
1320 1331
@@ -1477,7 +1488,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1477 1488
1478 /* flush snaps first time around only */ 1489 /* flush snaps first time around only */
1479 if (!list_empty(&ci->i_cap_snaps)) 1490 if (!list_empty(&ci->i_cap_snaps))
1480 __ceph_flush_snaps(ci, &session); 1491 __ceph_flush_snaps(ci, &session, 0);
1481 goto retry_locked; 1492 goto retry_locked;
1482retry: 1493retry:
1483 spin_lock(&inode->i_lock); 1494 spin_lock(&inode->i_lock);
@@ -1894,7 +1905,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1894 if (cap && cap->session == session) { 1905 if (cap && cap->session == session) {
1895 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, 1906 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
1896 cap, capsnap); 1907 cap, capsnap);
1897 __ceph_flush_snaps(ci, &session); 1908 __ceph_flush_snaps(ci, &session, 1);
1898 } else { 1909 } else {
1899 pr_err("%p auth cap %p not mds%d ???\n", inode, 1910 pr_err("%p auth cap %p not mds%d ???\n", inode,
1900 cap, session->s_mds); 1911 cap, session->s_mds);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 6e4f43ff23ec..a1986eb52045 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1021,11 +1021,15 @@ out_touch:
1021static void ceph_dentry_release(struct dentry *dentry) 1021static void ceph_dentry_release(struct dentry *dentry)
1022{ 1022{
1023 struct ceph_dentry_info *di = ceph_dentry(dentry); 1023 struct ceph_dentry_info *di = ceph_dentry(dentry);
1024 struct inode *parent_inode = dentry->d_parent->d_inode; 1024 struct inode *parent_inode = NULL;
1025 u64 snapid = ceph_snap(parent_inode); 1025 u64 snapid = CEPH_NOSNAP;
1026 1026
1027 if (!IS_ROOT(dentry)) {
1028 parent_inode = dentry->d_parent->d_inode;
1029 if (parent_inode)
1030 snapid = ceph_snap(parent_inode);
1031 }
1027 dout("dentry_release %p parent %p\n", dentry, parent_inode); 1032 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1028
1029 if (parent_inode && snapid != CEPH_SNAPDIR) { 1033 if (parent_inode && snapid != CEPH_SNAPDIR) {
1030 struct ceph_inode_info *ci = ceph_inode(parent_inode); 1034 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1031 1035
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index e7cca414da03..62377ec37edf 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -845,7 +845,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
845 * the caller) if we fail. 845 * the caller) if we fail.
846 */ 846 */
847static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, 847static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
848 bool *prehash) 848 bool *prehash, bool set_offset)
849{ 849{
850 struct dentry *realdn; 850 struct dentry *realdn;
851 851
@@ -877,7 +877,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
877 } 877 }
878 if ((!prehash || *prehash) && d_unhashed(dn)) 878 if ((!prehash || *prehash) && d_unhashed(dn))
879 d_rehash(dn); 879 d_rehash(dn);
880 ceph_set_dentry_offset(dn); 880 if (set_offset)
881 ceph_set_dentry_offset(dn);
881out: 882out:
882 return dn; 883 return dn;
883} 884}
@@ -1062,7 +1063,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1062 d_delete(dn); 1063 d_delete(dn);
1063 goto done; 1064 goto done;
1064 } 1065 }
1065 dn = splice_dentry(dn, in, &have_lease); 1066 dn = splice_dentry(dn, in, &have_lease, true);
1066 if (IS_ERR(dn)) { 1067 if (IS_ERR(dn)) {
1067 err = PTR_ERR(dn); 1068 err = PTR_ERR(dn);
1068 goto done; 1069 goto done;
@@ -1105,7 +1106,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1105 goto done; 1106 goto done;
1106 } 1107 }
1107 dout(" linking snapped dir %p to dn %p\n", in, dn); 1108 dout(" linking snapped dir %p to dn %p\n", in, dn);
1108 dn = splice_dentry(dn, in, NULL); 1109 dn = splice_dentry(dn, in, NULL, true);
1109 if (IS_ERR(dn)) { 1110 if (IS_ERR(dn)) {
1110 err = PTR_ERR(dn); 1111 err = PTR_ERR(dn);
1111 goto done; 1112 goto done;
@@ -1237,7 +1238,7 @@ retry_lookup:
1237 err = PTR_ERR(in); 1238 err = PTR_ERR(in);
1238 goto out; 1239 goto out;
1239 } 1240 }
1240 dn = splice_dentry(dn, in, NULL); 1241 dn = splice_dentry(dn, in, NULL, false);
1241 if (IS_ERR(dn)) 1242 if (IS_ERR(dn))
1242 dn = NULL; 1243 dn = NULL;
1243 } 1244 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index f091b1351786..fad95f8f2608 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2374,6 +2374,8 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2374 num_fcntl_locks, 2374 num_fcntl_locks,
2375 num_flock_locks); 2375 num_flock_locks);
2376 unlock_kernel(); 2376 unlock_kernel();
2377 } else {
2378 err = ceph_pagelist_append(pagelist, &rec, reclen);
2377 } 2379 }
2378 2380
2379out_free: 2381out_free:
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c
index b6859f47d364..46a368b6dce5 100644
--- a/fs/ceph/pagelist.c
+++ b/fs/ceph/pagelist.c
@@ -5,10 +5,18 @@
5 5
6#include "pagelist.h" 6#include "pagelist.h"
7 7
8static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
9{
10 struct page *page = list_entry(pl->head.prev, struct page,
11 lru);
12 kunmap(page);
13}
14
8int ceph_pagelist_release(struct ceph_pagelist *pl) 15int ceph_pagelist_release(struct ceph_pagelist *pl)
9{ 16{
10 if (pl->mapped_tail) 17 if (pl->mapped_tail)
11 kunmap(pl->mapped_tail); 18 ceph_pagelist_unmap_tail(pl);
19
12 while (!list_empty(&pl->head)) { 20 while (!list_empty(&pl->head)) {
13 struct page *page = list_first_entry(&pl->head, struct page, 21 struct page *page = list_first_entry(&pl->head, struct page,
14 lru); 22 lru);
@@ -26,7 +34,7 @@ static int ceph_pagelist_addpage(struct ceph_pagelist *pl)
26 pl->room += PAGE_SIZE; 34 pl->room += PAGE_SIZE;
27 list_add_tail(&page->lru, &pl->head); 35 list_add_tail(&page->lru, &pl->head);
28 if (pl->mapped_tail) 36 if (pl->mapped_tail)
29 kunmap(pl->mapped_tail); 37 ceph_pagelist_unmap_tail(pl);
30 pl->mapped_tail = kmap(page); 38 pl->mapped_tail = kmap(page);
31 return 0; 39 return 0;
32} 40}
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4868b9dcac5a..190b6c4a6f2b 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -119,6 +119,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
119 INIT_LIST_HEAD(&realm->children); 119 INIT_LIST_HEAD(&realm->children);
120 INIT_LIST_HEAD(&realm->child_item); 120 INIT_LIST_HEAD(&realm->child_item);
121 INIT_LIST_HEAD(&realm->empty_item); 121 INIT_LIST_HEAD(&realm->empty_item);
122 INIT_LIST_HEAD(&realm->dirty_item);
122 INIT_LIST_HEAD(&realm->inodes_with_caps); 123 INIT_LIST_HEAD(&realm->inodes_with_caps);
123 spin_lock_init(&realm->inodes_with_caps_lock); 124 spin_lock_init(&realm->inodes_with_caps_lock);
124 __insert_snap_realm(&mdsc->snap_realms, realm); 125 __insert_snap_realm(&mdsc->snap_realms, realm);
@@ -467,7 +468,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
467 INIT_LIST_HEAD(&capsnap->ci_item); 468 INIT_LIST_HEAD(&capsnap->ci_item);
468 INIT_LIST_HEAD(&capsnap->flushing_item); 469 INIT_LIST_HEAD(&capsnap->flushing_item);
469 470
470 capsnap->follows = snapc->seq - 1; 471 capsnap->follows = snapc->seq;
471 capsnap->issued = __ceph_caps_issued(ci, NULL); 472 capsnap->issued = __ceph_caps_issued(ci, NULL);
472 capsnap->dirty = dirty; 473 capsnap->dirty = dirty;
473 474
@@ -604,6 +605,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc,
604 struct ceph_snap_realm *realm; 605 struct ceph_snap_realm *realm;
605 int invalidate = 0; 606 int invalidate = 0;
606 int err = -ENOMEM; 607 int err = -ENOMEM;
608 LIST_HEAD(dirty_realms);
607 609
608 dout("update_snap_trace deletion=%d\n", deletion); 610 dout("update_snap_trace deletion=%d\n", deletion);
609more: 611more:
@@ -626,24 +628,6 @@ more:
626 } 628 }
627 } 629 }
628 630
629 if (le64_to_cpu(ri->seq) > realm->seq) {
630 dout("update_snap_trace updating %llx %p %lld -> %lld\n",
631 realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
632 /*
633 * if the realm seq has changed, queue a cap_snap for every
634 * inode with open caps. we do this _before_ we update
635 * the realm info so that we prepare for writeback under the
636 * _previous_ snap context.
637 *
638 * ...unless it's a snap deletion!
639 */
640 if (!deletion)
641 queue_realm_cap_snaps(realm);
642 } else {
643 dout("update_snap_trace %llx %p seq %lld unchanged\n",
644 realm->ino, realm, realm->seq);
645 }
646
647 /* ensure the parent is correct */ 631 /* ensure the parent is correct */
648 err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); 632 err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent));
649 if (err < 0) 633 if (err < 0)
@@ -651,6 +635,8 @@ more:
651 invalidate += err; 635 invalidate += err;
652 636
653 if (le64_to_cpu(ri->seq) > realm->seq) { 637 if (le64_to_cpu(ri->seq) > realm->seq) {
638 dout("update_snap_trace updating %llx %p %lld -> %lld\n",
639 realm->ino, realm, realm->seq, le64_to_cpu(ri->seq));
654 /* update realm parameters, snap lists */ 640 /* update realm parameters, snap lists */
655 realm->seq = le64_to_cpu(ri->seq); 641 realm->seq = le64_to_cpu(ri->seq);
656 realm->created = le64_to_cpu(ri->created); 642 realm->created = le64_to_cpu(ri->created);
@@ -668,9 +654,17 @@ more:
668 if (err < 0) 654 if (err < 0)
669 goto fail; 655 goto fail;
670 656
657 /* queue realm for cap_snap creation */
658 list_add(&realm->dirty_item, &dirty_realms);
659
671 invalidate = 1; 660 invalidate = 1;
672 } else if (!realm->cached_context) { 661 } else if (!realm->cached_context) {
662 dout("update_snap_trace %llx %p seq %lld new\n",
663 realm->ino, realm, realm->seq);
673 invalidate = 1; 664 invalidate = 1;
665 } else {
666 dout("update_snap_trace %llx %p seq %lld unchanged\n",
667 realm->ino, realm, realm->seq);
674 } 668 }
675 669
676 dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, 670 dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino,
@@ -683,6 +677,14 @@ more:
683 if (invalidate) 677 if (invalidate)
684 rebuild_snap_realms(realm); 678 rebuild_snap_realms(realm);
685 679
680 /*
681 * queue cap snaps _after_ we've built the new snap contexts,
682 * so that i_head_snapc can be set appropriately.
683 */
684 list_for_each_entry(realm, &dirty_realms, dirty_item) {
685 queue_realm_cap_snaps(realm);
686 }
687
686 __cleanup_empty_realms(mdsc); 688 __cleanup_empty_realms(mdsc);
687 return 0; 689 return 0;
688 690
@@ -715,7 +717,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
715 igrab(inode); 717 igrab(inode);
716 spin_unlock(&mdsc->snap_flush_lock); 718 spin_unlock(&mdsc->snap_flush_lock);
717 spin_lock(&inode->i_lock); 719 spin_lock(&inode->i_lock);
718 __ceph_flush_snaps(ci, &session); 720 __ceph_flush_snaps(ci, &session, 0);
719 spin_unlock(&inode->i_lock); 721 spin_unlock(&inode->i_lock);
720 iput(inode); 722 iput(inode);
721 spin_lock(&mdsc->snap_flush_lock); 723 spin_lock(&mdsc->snap_flush_lock);
@@ -816,6 +818,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
816 }; 818 };
817 struct inode *inode = ceph_find_inode(sb, vino); 819 struct inode *inode = ceph_find_inode(sb, vino);
818 struct ceph_inode_info *ci; 820 struct ceph_inode_info *ci;
821 struct ceph_snap_realm *oldrealm;
819 822
820 if (!inode) 823 if (!inode)
821 continue; 824 continue;
@@ -841,18 +844,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
841 dout(" will move %p to split realm %llx %p\n", 844 dout(" will move %p to split realm %llx %p\n",
842 inode, realm->ino, realm); 845 inode, realm->ino, realm);
843 /* 846 /*
844 * Remove the inode from the realm's inode 847 * Move the inode to the new realm
845 * list, but don't add it to the new realm
846 * yet. We don't want the cap_snap to be
847 * queued (again) by ceph_update_snap_trace()
848 * below. Queue it _now_, under the old context.
849 */ 848 */
850 spin_lock(&realm->inodes_with_caps_lock); 849 spin_lock(&realm->inodes_with_caps_lock);
851 list_del_init(&ci->i_snap_realm_item); 850 list_del_init(&ci->i_snap_realm_item);
851 list_add(&ci->i_snap_realm_item,
852 &realm->inodes_with_caps);
853 oldrealm = ci->i_snap_realm;
854 ci->i_snap_realm = realm;
852 spin_unlock(&realm->inodes_with_caps_lock); 855 spin_unlock(&realm->inodes_with_caps_lock);
853 spin_unlock(&inode->i_lock); 856 spin_unlock(&inode->i_lock);
854 857
855 ceph_queue_cap_snap(ci); 858 ceph_get_snap_realm(mdsc, realm);
859 ceph_put_snap_realm(mdsc, oldrealm);
856 860
857 iput(inode); 861 iput(inode);
858 continue; 862 continue;
@@ -880,43 +884,9 @@ skip_inode:
880 ceph_update_snap_trace(mdsc, p, e, 884 ceph_update_snap_trace(mdsc, p, e,
881 op == CEPH_SNAP_OP_DESTROY); 885 op == CEPH_SNAP_OP_DESTROY);
882 886
883 if (op == CEPH_SNAP_OP_SPLIT) { 887 if (op == CEPH_SNAP_OP_SPLIT)
884 /*
885 * ok, _now_ add the inodes into the new realm.
886 */
887 for (i = 0; i < num_split_inos; i++) {
888 struct ceph_vino vino = {
889 .ino = le64_to_cpu(split_inos[i]),
890 .snap = CEPH_NOSNAP,
891 };
892 struct inode *inode = ceph_find_inode(sb, vino);
893 struct ceph_inode_info *ci;
894
895 if (!inode)
896 continue;
897 ci = ceph_inode(inode);
898 spin_lock(&inode->i_lock);
899 if (list_empty(&ci->i_snap_realm_item)) {
900 struct ceph_snap_realm *oldrealm =
901 ci->i_snap_realm;
902
903 dout(" moving %p to split realm %llx %p\n",
904 inode, realm->ino, realm);
905 spin_lock(&realm->inodes_with_caps_lock);
906 list_add(&ci->i_snap_realm_item,
907 &realm->inodes_with_caps);
908 ci->i_snap_realm = realm;
909 spin_unlock(&realm->inodes_with_caps_lock);
910 ceph_get_snap_realm(mdsc, realm);
911 ceph_put_snap_realm(mdsc, oldrealm);
912 }
913 spin_unlock(&inode->i_lock);
914 iput(inode);
915 }
916
917 /* we took a reference when we created the realm, above */ 888 /* we took a reference when we created the realm, above */
918 ceph_put_snap_realm(mdsc, realm); 889 ceph_put_snap_realm(mdsc, realm);
919 }
920 890
921 __cleanup_empty_realms(mdsc); 891 __cleanup_empty_realms(mdsc);
922 892
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index c33897ae5725..b87638e84c4b 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -690,6 +690,8 @@ struct ceph_snap_realm {
690 690
691 struct list_head empty_item; /* if i have ref==0 */ 691 struct list_head empty_item; /* if i have ref==0 */
692 692
693 struct list_head dirty_item; /* if realm needs new context */
694
693 /* the current set of snaps for this realm */ 695 /* the current set of snaps for this realm */
694 struct ceph_snap_context *cached_context; 696 struct ceph_snap_context *cached_context;
695 697
@@ -826,7 +828,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
826extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, 828extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
827 struct ceph_snap_context *snapc); 829 struct ceph_snap_context *snapc);
828extern void __ceph_flush_snaps(struct ceph_inode_info *ci, 830extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
829 struct ceph_mds_session **psession); 831 struct ceph_mds_session **psession,
832 int again);
830extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, 833extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
831 struct ceph_mds_session *session); 834 struct ceph_mds_session *session);
832extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); 835extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);