aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2011-11-30 12:47:09 -0500
committerSage Weil <sage@newdream.net>2011-12-07 13:46:44 -0500
commitbe655596b3de5873f994ddbe205751a5ffb4de39 (patch)
treecf6e53c6344f87a47ac68d6a6ff4d7dac6e5c25e
parent51703306b3b9ea7c05728040998521e47358147b (diff)
ceph: use i_ceph_lock instead of i_lock
We have been using i_lock to protect all kinds of data structures in the ceph_inode_info struct, including lists of inodes that we need to iterate over while avoiding races with inode destruction. That requires grabbing a reference to the inode with the list lock protected, but igrab() now takes i_lock to check the inode flags. Changing the list lock ordering would be a painful process. However, using a ceph-specific i_ceph_lock in the ceph inode instead of i_lock is a simple mechanical change and avoids the ordering constraints imposed by igrab(). Reported-by: Amon Ott <a.ott@m-privacy.de> Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/caps.c187
-rw-r--r--fs/ceph/dir.c24
-rw-r--r--fs/ceph/file.c20
-rw-r--r--fs/ceph/inode.c53
-rw-r--r--fs/ceph/ioctl.c4
-rw-r--r--fs/ceph/mds_client.c32
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/snap.c16
-rw-r--r--fs/ceph/super.h31
-rw-r--r--fs/ceph/xattr.c42
11 files changed, 212 insertions, 207 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4144caf2f9d3..173b1d22e59b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
87 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); 87 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
88 88
89 /* dirty the head */ 89 /* dirty the head */
90 spin_lock(&inode->i_lock); 90 spin_lock(&ci->i_ceph_lock);
91 if (ci->i_head_snapc == NULL) 91 if (ci->i_head_snapc == NULL)
92 ci->i_head_snapc = ceph_get_snap_context(snapc); 92 ci->i_head_snapc = ceph_get_snap_context(snapc);
93 ++ci->i_wrbuffer_ref_head; 93 ++ci->i_wrbuffer_ref_head;
@@ -100,7 +100,7 @@ static int ceph_set_page_dirty(struct page *page)
100 ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1, 100 ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
101 ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, 101 ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
102 snapc, snapc->seq, snapc->num_snaps); 102 snapc, snapc->seq, snapc->num_snaps);
103 spin_unlock(&inode->i_lock); 103 spin_unlock(&ci->i_ceph_lock);
104 104
105 /* now adjust page */ 105 /* now adjust page */
106 spin_lock_irq(&mapping->tree_lock); 106 spin_lock_irq(&mapping->tree_lock);
@@ -391,7 +391,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
391 struct ceph_snap_context *snapc = NULL; 391 struct ceph_snap_context *snapc = NULL;
392 struct ceph_cap_snap *capsnap = NULL; 392 struct ceph_cap_snap *capsnap = NULL;
393 393
394 spin_lock(&inode->i_lock); 394 spin_lock(&ci->i_ceph_lock);
395 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 395 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
396 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, 396 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
397 capsnap->context, capsnap->dirty_pages); 397 capsnap->context, capsnap->dirty_pages);
@@ -407,7 +407,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
407 dout(" head snapc %p has %d dirty pages\n", 407 dout(" head snapc %p has %d dirty pages\n",
408 snapc, ci->i_wrbuffer_ref_head); 408 snapc, ci->i_wrbuffer_ref_head);
409 } 409 }
410 spin_unlock(&inode->i_lock); 410 spin_unlock(&ci->i_ceph_lock);
411 return snapc; 411 return snapc;
412} 412}
413 413
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0f327c6c9679..8b53193e4f7c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -309,7 +309,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
309/* 309/*
310 * Find ceph_cap for given mds, if any. 310 * Find ceph_cap for given mds, if any.
311 * 311 *
312 * Called with i_lock held. 312 * Called with i_ceph_lock held.
313 */ 313 */
314static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) 314static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
315{ 315{
@@ -332,9 +332,9 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
332{ 332{
333 struct ceph_cap *cap; 333 struct ceph_cap *cap;
334 334
335 spin_lock(&ci->vfs_inode.i_lock); 335 spin_lock(&ci->i_ceph_lock);
336 cap = __get_cap_for_mds(ci, mds); 336 cap = __get_cap_for_mds(ci, mds);
337 spin_unlock(&ci->vfs_inode.i_lock); 337 spin_unlock(&ci->i_ceph_lock);
338 return cap; 338 return cap;
339} 339}
340 340
@@ -361,15 +361,16 @@ static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
361 361
362int ceph_get_cap_mds(struct inode *inode) 362int ceph_get_cap_mds(struct inode *inode)
363{ 363{
364 struct ceph_inode_info *ci = ceph_inode(inode);
364 int mds; 365 int mds;
365 spin_lock(&inode->i_lock); 366 spin_lock(&ci->i_ceph_lock);
366 mds = __ceph_get_cap_mds(ceph_inode(inode)); 367 mds = __ceph_get_cap_mds(ceph_inode(inode));
367 spin_unlock(&inode->i_lock); 368 spin_unlock(&ci->i_ceph_lock);
368 return mds; 369 return mds;
369} 370}
370 371
371/* 372/*
372 * Called under i_lock. 373 * Called under i_ceph_lock.
373 */ 374 */
374static void __insert_cap_node(struct ceph_inode_info *ci, 375static void __insert_cap_node(struct ceph_inode_info *ci,
375 struct ceph_cap *new) 376 struct ceph_cap *new)
@@ -415,7 +416,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
415 * 416 *
416 * If I_FLUSH is set, leave the inode at the front of the list. 417 * If I_FLUSH is set, leave the inode at the front of the list.
417 * 418 *
418 * Caller holds i_lock 419 * Caller holds i_ceph_lock
419 * -> we take mdsc->cap_delay_lock 420 * -> we take mdsc->cap_delay_lock
420 */ 421 */
421static void __cap_delay_requeue(struct ceph_mds_client *mdsc, 422static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
@@ -457,7 +458,7 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
457/* 458/*
458 * Cancel delayed work on cap. 459 * Cancel delayed work on cap.
459 * 460 *
460 * Caller must hold i_lock. 461 * Caller must hold i_ceph_lock.
461 */ 462 */
462static void __cap_delay_cancel(struct ceph_mds_client *mdsc, 463static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
463 struct ceph_inode_info *ci) 464 struct ceph_inode_info *ci)
@@ -532,14 +533,14 @@ int ceph_add_cap(struct inode *inode,
532 wanted |= ceph_caps_for_mode(fmode); 533 wanted |= ceph_caps_for_mode(fmode);
533 534
534retry: 535retry:
535 spin_lock(&inode->i_lock); 536 spin_lock(&ci->i_ceph_lock);
536 cap = __get_cap_for_mds(ci, mds); 537 cap = __get_cap_for_mds(ci, mds);
537 if (!cap) { 538 if (!cap) {
538 if (new_cap) { 539 if (new_cap) {
539 cap = new_cap; 540 cap = new_cap;
540 new_cap = NULL; 541 new_cap = NULL;
541 } else { 542 } else {
542 spin_unlock(&inode->i_lock); 543 spin_unlock(&ci->i_ceph_lock);
543 new_cap = get_cap(mdsc, caps_reservation); 544 new_cap = get_cap(mdsc, caps_reservation);
544 if (new_cap == NULL) 545 if (new_cap == NULL)
545 return -ENOMEM; 546 return -ENOMEM;
@@ -625,7 +626,7 @@ retry:
625 626
626 if (fmode >= 0) 627 if (fmode >= 0)
627 __ceph_get_fmode(ci, fmode); 628 __ceph_get_fmode(ci, fmode);
628 spin_unlock(&inode->i_lock); 629 spin_unlock(&ci->i_ceph_lock);
629 wake_up_all(&ci->i_cap_wq); 630 wake_up_all(&ci->i_cap_wq);
630 return 0; 631 return 0;
631} 632}
@@ -792,7 +793,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
792 struct rb_node *p; 793 struct rb_node *p;
793 int ret = 0; 794 int ret = 0;
794 795
795 spin_lock(&inode->i_lock); 796 spin_lock(&ci->i_ceph_lock);
796 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { 797 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
797 cap = rb_entry(p, struct ceph_cap, ci_node); 798 cap = rb_entry(p, struct ceph_cap, ci_node);
798 if (__cap_is_valid(cap) && 799 if (__cap_is_valid(cap) &&
@@ -801,7 +802,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
801 break; 802 break;
802 } 803 }
803 } 804 }
804 spin_unlock(&inode->i_lock); 805 spin_unlock(&ci->i_ceph_lock);
805 dout("ceph_caps_revoking %p %s = %d\n", inode, 806 dout("ceph_caps_revoking %p %s = %d\n", inode,
806 ceph_cap_string(mask), ret); 807 ceph_cap_string(mask), ret);
807 return ret; 808 return ret;
@@ -855,7 +856,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
855} 856}
856 857
857/* 858/*
858 * called under i_lock 859 * called under i_ceph_lock
859 */ 860 */
860static int __ceph_is_any_caps(struct ceph_inode_info *ci) 861static int __ceph_is_any_caps(struct ceph_inode_info *ci)
861{ 862{
@@ -865,7 +866,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
865/* 866/*
866 * Remove a cap. Take steps to deal with a racing iterate_session_caps. 867 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
867 * 868 *
868 * caller should hold i_lock. 869 * caller should hold i_ceph_lock.
869 * caller will not hold session s_mutex if called from destroy_inode. 870 * caller will not hold session s_mutex if called from destroy_inode.
870 */ 871 */
871void __ceph_remove_cap(struct ceph_cap *cap) 872void __ceph_remove_cap(struct ceph_cap *cap)
@@ -1028,7 +1029,7 @@ static void __queue_cap_release(struct ceph_mds_session *session,
1028 1029
1029/* 1030/*
1030 * Queue cap releases when an inode is dropped from our cache. Since 1031 * Queue cap releases when an inode is dropped from our cache. Since
1031 * inode is about to be destroyed, there is no need for i_lock. 1032 * inode is about to be destroyed, there is no need for i_ceph_lock.
1032 */ 1033 */
1033void ceph_queue_caps_release(struct inode *inode) 1034void ceph_queue_caps_release(struct inode *inode)
1034{ 1035{
@@ -1049,7 +1050,7 @@ void ceph_queue_caps_release(struct inode *inode)
1049 1050
1050/* 1051/*
1051 * Send a cap msg on the given inode. Update our caps state, then 1052 * Send a cap msg on the given inode. Update our caps state, then
1052 * drop i_lock and send the message. 1053 * drop i_ceph_lock and send the message.
1053 * 1054 *
1054 * Make note of max_size reported/requested from mds, revoked caps 1055 * Make note of max_size reported/requested from mds, revoked caps
1055 * that have now been implemented. 1056 * that have now been implemented.
@@ -1061,13 +1062,13 @@ void ceph_queue_caps_release(struct inode *inode)
1061 * Return non-zero if delayed release, or we experienced an error 1062 * Return non-zero if delayed release, or we experienced an error
1062 * such that the caller should requeue + retry later. 1063 * such that the caller should requeue + retry later.
1063 * 1064 *
1064 * called with i_lock, then drops it. 1065 * called with i_ceph_lock, then drops it.
1065 * caller should hold snap_rwsem (read), s_mutex. 1066 * caller should hold snap_rwsem (read), s_mutex.
1066 */ 1067 */
1067static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, 1068static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1068 int op, int used, int want, int retain, int flushing, 1069 int op, int used, int want, int retain, int flushing,
1069 unsigned *pflush_tid) 1070 unsigned *pflush_tid)
1070 __releases(cap->ci->vfs_inode->i_lock) 1071 __releases(cap->ci->i_ceph_lock)
1071{ 1072{
1072 struct ceph_inode_info *ci = cap->ci; 1073 struct ceph_inode_info *ci = cap->ci;
1073 struct inode *inode = &ci->vfs_inode; 1074 struct inode *inode = &ci->vfs_inode;
@@ -1170,7 +1171,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1170 xattr_version = ci->i_xattrs.version; 1171 xattr_version = ci->i_xattrs.version;
1171 } 1172 }
1172 1173
1173 spin_unlock(&inode->i_lock); 1174 spin_unlock(&ci->i_ceph_lock);
1174 1175
1175 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, 1176 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
1176 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, 1177 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
@@ -1198,13 +1199,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1198 * Unless @again is true, skip cap_snaps that were already sent to 1199 * Unless @again is true, skip cap_snaps that were already sent to
1199 * the MDS (i.e., during this session). 1200 * the MDS (i.e., during this session).
1200 * 1201 *
1201 * Called under i_lock. Takes s_mutex as needed. 1202 * Called under i_ceph_lock. Takes s_mutex as needed.
1202 */ 1203 */
1203void __ceph_flush_snaps(struct ceph_inode_info *ci, 1204void __ceph_flush_snaps(struct ceph_inode_info *ci,
1204 struct ceph_mds_session **psession, 1205 struct ceph_mds_session **psession,
1205 int again) 1206 int again)
1206 __releases(ci->vfs_inode->i_lock) 1207 __releases(ci->i_ceph_lock)
1207 __acquires(ci->vfs_inode->i_lock) 1208 __acquires(ci->i_ceph_lock)
1208{ 1209{
1209 struct inode *inode = &ci->vfs_inode; 1210 struct inode *inode = &ci->vfs_inode;
1210 int mds; 1211 int mds;
@@ -1261,7 +1262,7 @@ retry:
1261 session = NULL; 1262 session = NULL;
1262 } 1263 }
1263 if (!session) { 1264 if (!session) {
1264 spin_unlock(&inode->i_lock); 1265 spin_unlock(&ci->i_ceph_lock);
1265 mutex_lock(&mdsc->mutex); 1266 mutex_lock(&mdsc->mutex);
1266 session = __ceph_lookup_mds_session(mdsc, mds); 1267 session = __ceph_lookup_mds_session(mdsc, mds);
1267 mutex_unlock(&mdsc->mutex); 1268 mutex_unlock(&mdsc->mutex);
@@ -1275,7 +1276,7 @@ retry:
1275 * deletion or migration. retry, and we'll 1276 * deletion or migration. retry, and we'll
1276 * get a better @mds value next time. 1277 * get a better @mds value next time.
1277 */ 1278 */
1278 spin_lock(&inode->i_lock); 1279 spin_lock(&ci->i_ceph_lock);
1279 goto retry; 1280 goto retry;
1280 } 1281 }
1281 1282
@@ -1285,7 +1286,7 @@ retry:
1285 list_del_init(&capsnap->flushing_item); 1286 list_del_init(&capsnap->flushing_item);
1286 list_add_tail(&capsnap->flushing_item, 1287 list_add_tail(&capsnap->flushing_item,
1287 &session->s_cap_snaps_flushing); 1288 &session->s_cap_snaps_flushing);
1288 spin_unlock(&inode->i_lock); 1289 spin_unlock(&ci->i_ceph_lock);
1289 1290
1290 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", 1291 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
1291 inode, capsnap, capsnap->follows, capsnap->flush_tid); 1292 inode, capsnap, capsnap->follows, capsnap->flush_tid);
@@ -1302,7 +1303,7 @@ retry:
1302 next_follows = capsnap->follows + 1; 1303 next_follows = capsnap->follows + 1;
1303 ceph_put_cap_snap(capsnap); 1304 ceph_put_cap_snap(capsnap);
1304 1305
1305 spin_lock(&inode->i_lock); 1306 spin_lock(&ci->i_ceph_lock);
1306 goto retry; 1307 goto retry;
1307 } 1308 }
1308 1309
@@ -1322,11 +1323,9 @@ out:
1322 1323
1323static void ceph_flush_snaps(struct ceph_inode_info *ci) 1324static void ceph_flush_snaps(struct ceph_inode_info *ci)
1324{ 1325{
1325 struct inode *inode = &ci->vfs_inode; 1326 spin_lock(&ci->i_ceph_lock);
1326
1327 spin_lock(&inode->i_lock);
1328 __ceph_flush_snaps(ci, NULL, 0); 1327 __ceph_flush_snaps(ci, NULL, 0);
1329 spin_unlock(&inode->i_lock); 1328 spin_unlock(&ci->i_ceph_lock);
1330} 1329}
1331 1330
1332/* 1331/*
@@ -1373,7 +1372,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1373 * Add dirty inode to the flushing list. Assigned a seq number so we 1372 * Add dirty inode to the flushing list. Assigned a seq number so we
1374 * can wait for caps to flush without starving. 1373 * can wait for caps to flush without starving.
1375 * 1374 *
1376 * Called under i_lock. 1375 * Called under i_ceph_lock.
1377 */ 1376 */
1378static int __mark_caps_flushing(struct inode *inode, 1377static int __mark_caps_flushing(struct inode *inode,
1379 struct ceph_mds_session *session) 1378 struct ceph_mds_session *session)
@@ -1421,9 +1420,9 @@ static int try_nonblocking_invalidate(struct inode *inode)
1421 struct ceph_inode_info *ci = ceph_inode(inode); 1420 struct ceph_inode_info *ci = ceph_inode(inode);
1422 u32 invalidating_gen = ci->i_rdcache_gen; 1421 u32 invalidating_gen = ci->i_rdcache_gen;
1423 1422
1424 spin_unlock(&inode->i_lock); 1423 spin_unlock(&ci->i_ceph_lock);
1425 invalidate_mapping_pages(&inode->i_data, 0, -1); 1424 invalidate_mapping_pages(&inode->i_data, 0, -1);
1426 spin_lock(&inode->i_lock); 1425 spin_lock(&ci->i_ceph_lock);
1427 1426
1428 if (inode->i_data.nrpages == 0 && 1427 if (inode->i_data.nrpages == 0 &&
1429 invalidating_gen == ci->i_rdcache_gen) { 1428 invalidating_gen == ci->i_rdcache_gen) {
@@ -1470,7 +1469,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1470 if (mdsc->stopping) 1469 if (mdsc->stopping)
1471 is_delayed = 1; 1470 is_delayed = 1;
1472 1471
1473 spin_lock(&inode->i_lock); 1472 spin_lock(&ci->i_ceph_lock);
1474 1473
1475 if (ci->i_ceph_flags & CEPH_I_FLUSH) 1474 if (ci->i_ceph_flags & CEPH_I_FLUSH)
1476 flags |= CHECK_CAPS_FLUSH; 1475 flags |= CHECK_CAPS_FLUSH;
@@ -1480,7 +1479,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1480 __ceph_flush_snaps(ci, &session, 0); 1479 __ceph_flush_snaps(ci, &session, 0);
1481 goto retry_locked; 1480 goto retry_locked;
1482retry: 1481retry:
1483 spin_lock(&inode->i_lock); 1482 spin_lock(&ci->i_ceph_lock);
1484retry_locked: 1483retry_locked:
1485 file_wanted = __ceph_caps_file_wanted(ci); 1484 file_wanted = __ceph_caps_file_wanted(ci);
1486 used = __ceph_caps_used(ci); 1485 used = __ceph_caps_used(ci);
@@ -1634,7 +1633,7 @@ ack:
1634 if (mutex_trylock(&session->s_mutex) == 0) { 1633 if (mutex_trylock(&session->s_mutex) == 0) {
1635 dout("inverting session/ino locks on %p\n", 1634 dout("inverting session/ino locks on %p\n",
1636 session); 1635 session);
1637 spin_unlock(&inode->i_lock); 1636 spin_unlock(&ci->i_ceph_lock);
1638 if (took_snap_rwsem) { 1637 if (took_snap_rwsem) {
1639 up_read(&mdsc->snap_rwsem); 1638 up_read(&mdsc->snap_rwsem);
1640 took_snap_rwsem = 0; 1639 took_snap_rwsem = 0;
@@ -1648,7 +1647,7 @@ ack:
1648 if (down_read_trylock(&mdsc->snap_rwsem) == 0) { 1647 if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
1649 dout("inverting snap/in locks on %p\n", 1648 dout("inverting snap/in locks on %p\n",
1650 inode); 1649 inode);
1651 spin_unlock(&inode->i_lock); 1650 spin_unlock(&ci->i_ceph_lock);
1652 down_read(&mdsc->snap_rwsem); 1651 down_read(&mdsc->snap_rwsem);
1653 took_snap_rwsem = 1; 1652 took_snap_rwsem = 1;
1654 goto retry; 1653 goto retry;
@@ -1664,10 +1663,10 @@ ack:
1664 mds = cap->mds; /* remember mds, so we don't repeat */ 1663 mds = cap->mds; /* remember mds, so we don't repeat */
1665 sent++; 1664 sent++;
1666 1665
1667 /* __send_cap drops i_lock */ 1666 /* __send_cap drops i_ceph_lock */
1668 delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, 1667 delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
1669 retain, flushing, NULL); 1668 retain, flushing, NULL);
1670 goto retry; /* retake i_lock and restart our cap scan. */ 1669 goto retry; /* retake i_ceph_lock and restart our cap scan. */
1671 } 1670 }
1672 1671
1673 /* 1672 /*
@@ -1681,7 +1680,7 @@ ack:
1681 else if (!is_delayed || force_requeue) 1680 else if (!is_delayed || force_requeue)
1682 __cap_delay_requeue(mdsc, ci); 1681 __cap_delay_requeue(mdsc, ci);
1683 1682
1684 spin_unlock(&inode->i_lock); 1683 spin_unlock(&ci->i_ceph_lock);
1685 1684
1686 if (queue_invalidate) 1685 if (queue_invalidate)
1687 ceph_queue_invalidate(inode); 1686 ceph_queue_invalidate(inode);
@@ -1704,7 +1703,7 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
1704 int flushing = 0; 1703 int flushing = 0;
1705 1704
1706retry: 1705retry:
1707 spin_lock(&inode->i_lock); 1706 spin_lock(&ci->i_ceph_lock);
1708 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { 1707 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
1709 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); 1708 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
1710 goto out; 1709 goto out;
@@ -1716,7 +1715,7 @@ retry:
1716 int delayed; 1715 int delayed;
1717 1716
1718 if (!session) { 1717 if (!session) {
1719 spin_unlock(&inode->i_lock); 1718 spin_unlock(&ci->i_ceph_lock);
1720 session = cap->session; 1719 session = cap->session;
1721 mutex_lock(&session->s_mutex); 1720 mutex_lock(&session->s_mutex);
1722 goto retry; 1721 goto retry;
@@ -1727,18 +1726,18 @@ retry:
1727 1726
1728 flushing = __mark_caps_flushing(inode, session); 1727 flushing = __mark_caps_flushing(inode, session);
1729 1728
1730 /* __send_cap drops i_lock */ 1729 /* __send_cap drops i_ceph_lock */
1731 delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, 1730 delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
1732 cap->issued | cap->implemented, flushing, 1731 cap->issued | cap->implemented, flushing,
1733 flush_tid); 1732 flush_tid);
1734 if (!delayed) 1733 if (!delayed)
1735 goto out_unlocked; 1734 goto out_unlocked;
1736 1735
1737 spin_lock(&inode->i_lock); 1736 spin_lock(&ci->i_ceph_lock);
1738 __cap_delay_requeue(mdsc, ci); 1737 __cap_delay_requeue(mdsc, ci);
1739 } 1738 }
1740out: 1739out:
1741 spin_unlock(&inode->i_lock); 1740 spin_unlock(&ci->i_ceph_lock);
1742out_unlocked: 1741out_unlocked:
1743 if (session && unlock_session) 1742 if (session && unlock_session)
1744 mutex_unlock(&session->s_mutex); 1743 mutex_unlock(&session->s_mutex);
@@ -1753,7 +1752,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
1753 struct ceph_inode_info *ci = ceph_inode(inode); 1752 struct ceph_inode_info *ci = ceph_inode(inode);
1754 int i, ret = 1; 1753 int i, ret = 1;
1755 1754
1756 spin_lock(&inode->i_lock); 1755 spin_lock(&ci->i_ceph_lock);
1757 for (i = 0; i < CEPH_CAP_BITS; i++) 1756 for (i = 0; i < CEPH_CAP_BITS; i++)
1758 if ((ci->i_flushing_caps & (1 << i)) && 1757 if ((ci->i_flushing_caps & (1 << i)) &&
1759 ci->i_cap_flush_tid[i] <= tid) { 1758 ci->i_cap_flush_tid[i] <= tid) {
@@ -1761,7 +1760,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
1761 ret = 0; 1760 ret = 0;
1762 break; 1761 break;
1763 } 1762 }
1764 spin_unlock(&inode->i_lock); 1763 spin_unlock(&ci->i_ceph_lock);
1765 return ret; 1764 return ret;
1766} 1765}
1767 1766
@@ -1868,10 +1867,10 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
1868 struct ceph_mds_client *mdsc = 1867 struct ceph_mds_client *mdsc =
1869 ceph_sb_to_client(inode->i_sb)->mdsc; 1868 ceph_sb_to_client(inode->i_sb)->mdsc;
1870 1869
1871 spin_lock(&inode->i_lock); 1870 spin_lock(&ci->i_ceph_lock);
1872 if (__ceph_caps_dirty(ci)) 1871 if (__ceph_caps_dirty(ci))
1873 __cap_delay_requeue_front(mdsc, ci); 1872 __cap_delay_requeue_front(mdsc, ci);
1874 spin_unlock(&inode->i_lock); 1873 spin_unlock(&ci->i_ceph_lock);
1875 } 1874 }
1876 return err; 1875 return err;
1877} 1876}
@@ -1894,7 +1893,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1894 struct inode *inode = &ci->vfs_inode; 1893 struct inode *inode = &ci->vfs_inode;
1895 struct ceph_cap *cap; 1894 struct ceph_cap *cap;
1896 1895
1897 spin_lock(&inode->i_lock); 1896 spin_lock(&ci->i_ceph_lock);
1898 cap = ci->i_auth_cap; 1897 cap = ci->i_auth_cap;
1899 if (cap && cap->session == session) { 1898 if (cap && cap->session == session) {
1900 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, 1899 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
@@ -1904,7 +1903,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1904 pr_err("%p auth cap %p not mds%d ???\n", inode, 1903 pr_err("%p auth cap %p not mds%d ???\n", inode,
1905 cap, session->s_mds); 1904 cap, session->s_mds);
1906 } 1905 }
1907 spin_unlock(&inode->i_lock); 1906 spin_unlock(&ci->i_ceph_lock);
1908 } 1907 }
1909} 1908}
1910 1909
@@ -1921,7 +1920,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
1921 struct ceph_cap *cap; 1920 struct ceph_cap *cap;
1922 int delayed = 0; 1921 int delayed = 0;
1923 1922
1924 spin_lock(&inode->i_lock); 1923 spin_lock(&ci->i_ceph_lock);
1925 cap = ci->i_auth_cap; 1924 cap = ci->i_auth_cap;
1926 if (cap && cap->session == session) { 1925 if (cap && cap->session == session) {
1927 dout("kick_flushing_caps %p cap %p %s\n", inode, 1926 dout("kick_flushing_caps %p cap %p %s\n", inode,
@@ -1932,14 +1931,14 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
1932 cap->issued | cap->implemented, 1931 cap->issued | cap->implemented,
1933 ci->i_flushing_caps, NULL); 1932 ci->i_flushing_caps, NULL);
1934 if (delayed) { 1933 if (delayed) {
1935 spin_lock(&inode->i_lock); 1934 spin_lock(&ci->i_ceph_lock);
1936 __cap_delay_requeue(mdsc, ci); 1935 __cap_delay_requeue(mdsc, ci);
1937 spin_unlock(&inode->i_lock); 1936 spin_unlock(&ci->i_ceph_lock);
1938 } 1937 }
1939 } else { 1938 } else {
1940 pr_err("%p auth cap %p not mds%d ???\n", inode, 1939 pr_err("%p auth cap %p not mds%d ???\n", inode,
1941 cap, session->s_mds); 1940 cap, session->s_mds);
1942 spin_unlock(&inode->i_lock); 1941 spin_unlock(&ci->i_ceph_lock);
1943 } 1942 }
1944 } 1943 }
1945} 1944}
@@ -1952,7 +1951,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1952 struct ceph_cap *cap; 1951 struct ceph_cap *cap;
1953 int delayed = 0; 1952 int delayed = 0;
1954 1953
1955 spin_lock(&inode->i_lock); 1954 spin_lock(&ci->i_ceph_lock);
1956 cap = ci->i_auth_cap; 1955 cap = ci->i_auth_cap;
1957 dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, 1956 dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
1958 ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); 1957 ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
@@ -1964,12 +1963,12 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1964 cap->issued | cap->implemented, 1963 cap->issued | cap->implemented,
1965 ci->i_flushing_caps, NULL); 1964 ci->i_flushing_caps, NULL);
1966 if (delayed) { 1965 if (delayed) {
1967 spin_lock(&inode->i_lock); 1966 spin_lock(&ci->i_ceph_lock);
1968 __cap_delay_requeue(mdsc, ci); 1967 __cap_delay_requeue(mdsc, ci);
1969 spin_unlock(&inode->i_lock); 1968 spin_unlock(&ci->i_ceph_lock);
1970 } 1969 }
1971 } else { 1970 } else {
1972 spin_unlock(&inode->i_lock); 1971 spin_unlock(&ci->i_ceph_lock);
1973 } 1972 }
1974} 1973}
1975 1974
@@ -1978,7 +1977,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1978 * Take references to capabilities we hold, so that we don't release 1977 * Take references to capabilities we hold, so that we don't release
1979 * them to the MDS prematurely. 1978 * them to the MDS prematurely.
1980 * 1979 *
1981 * Protected by i_lock. 1980 * Protected by i_ceph_lock.
1982 */ 1981 */
1983static void __take_cap_refs(struct ceph_inode_info *ci, int got) 1982static void __take_cap_refs(struct ceph_inode_info *ci, int got)
1984{ 1983{
@@ -2016,7 +2015,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2016 2015
2017 dout("get_cap_refs %p need %s want %s\n", inode, 2016 dout("get_cap_refs %p need %s want %s\n", inode,
2018 ceph_cap_string(need), ceph_cap_string(want)); 2017 ceph_cap_string(need), ceph_cap_string(want));
2019 spin_lock(&inode->i_lock); 2018 spin_lock(&ci->i_ceph_lock);
2020 2019
2021 /* make sure file is actually open */ 2020 /* make sure file is actually open */
2022 file_wanted = __ceph_caps_file_wanted(ci); 2021 file_wanted = __ceph_caps_file_wanted(ci);
@@ -2077,7 +2076,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2077 ceph_cap_string(have), ceph_cap_string(need)); 2076 ceph_cap_string(have), ceph_cap_string(need));
2078 } 2077 }
2079out: 2078out:
2080 spin_unlock(&inode->i_lock); 2079 spin_unlock(&ci->i_ceph_lock);
2081 dout("get_cap_refs %p ret %d got %s\n", inode, 2080 dout("get_cap_refs %p ret %d got %s\n", inode,
2082 ret, ceph_cap_string(*got)); 2081 ret, ceph_cap_string(*got));
2083 return ret; 2082 return ret;
@@ -2094,7 +2093,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2094 int check = 0; 2093 int check = 0;
2095 2094
2096 /* do we need to explicitly request a larger max_size? */ 2095 /* do we need to explicitly request a larger max_size? */
2097 spin_lock(&inode->i_lock); 2096 spin_lock(&ci->i_ceph_lock);
2098 if ((endoff >= ci->i_max_size || 2097 if ((endoff >= ci->i_max_size ||
2099 endoff > (inode->i_size << 1)) && 2098 endoff > (inode->i_size << 1)) &&
2100 endoff > ci->i_wanted_max_size) { 2099 endoff > ci->i_wanted_max_size) {
@@ -2103,7 +2102,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2103 ci->i_wanted_max_size = endoff; 2102 ci->i_wanted_max_size = endoff;
2104 check = 1; 2103 check = 1;
2105 } 2104 }
2106 spin_unlock(&inode->i_lock); 2105 spin_unlock(&ci->i_ceph_lock);
2107 if (check) 2106 if (check)
2108 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2107 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
2109} 2108}
@@ -2140,9 +2139,9 @@ retry:
2140 */ 2139 */
2141void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) 2140void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
2142{ 2141{
2143 spin_lock(&ci->vfs_inode.i_lock); 2142 spin_lock(&ci->i_ceph_lock);
2144 __take_cap_refs(ci, caps); 2143 __take_cap_refs(ci, caps);
2145 spin_unlock(&ci->vfs_inode.i_lock); 2144 spin_unlock(&ci->i_ceph_lock);
2146} 2145}
2147 2146
2148/* 2147/*
@@ -2160,7 +2159,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2160 int last = 0, put = 0, flushsnaps = 0, wake = 0; 2159 int last = 0, put = 0, flushsnaps = 0, wake = 0;
2161 struct ceph_cap_snap *capsnap; 2160 struct ceph_cap_snap *capsnap;
2162 2161
2163 spin_lock(&inode->i_lock); 2162 spin_lock(&ci->i_ceph_lock);
2164 if (had & CEPH_CAP_PIN) 2163 if (had & CEPH_CAP_PIN)
2165 --ci->i_pin_ref; 2164 --ci->i_pin_ref;
2166 if (had & CEPH_CAP_FILE_RD) 2165 if (had & CEPH_CAP_FILE_RD)
@@ -2193,7 +2192,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2193 } 2192 }
2194 } 2193 }
2195 } 2194 }
2196 spin_unlock(&inode->i_lock); 2195 spin_unlock(&ci->i_ceph_lock);
2197 2196
2198 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), 2197 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
2199 last ? " last" : "", put ? " put" : ""); 2198 last ? " last" : "", put ? " put" : "");
@@ -2225,7 +2224,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2225 int found = 0; 2224 int found = 0;
2226 struct ceph_cap_snap *capsnap = NULL; 2225 struct ceph_cap_snap *capsnap = NULL;
2227 2226
2228 spin_lock(&inode->i_lock); 2227 spin_lock(&ci->i_ceph_lock);
2229 ci->i_wrbuffer_ref -= nr; 2228 ci->i_wrbuffer_ref -= nr;
2230 last = !ci->i_wrbuffer_ref; 2229 last = !ci->i_wrbuffer_ref;
2231 2230
@@ -2274,7 +2273,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2274 } 2273 }
2275 } 2274 }
2276 2275
2277 spin_unlock(&inode->i_lock); 2276 spin_unlock(&ci->i_ceph_lock);
2278 2277
2279 if (last) { 2278 if (last) {
2280 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2279 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -2291,7 +2290,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2291 * Handle a cap GRANT message from the MDS. (Note that a GRANT may 2290 * Handle a cap GRANT message from the MDS. (Note that a GRANT may
2292 * actually be a revocation if it specifies a smaller cap set.) 2291 * actually be a revocation if it specifies a smaller cap set.)
2293 * 2292 *
2294 * caller holds s_mutex and i_lock, we drop both. 2293 * caller holds s_mutex and i_ceph_lock, we drop both.
2295 * 2294 *
2296 * return value: 2295 * return value:
2297 * 0 - ok 2296 * 0 - ok
@@ -2302,7 +2301,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2302 struct ceph_mds_session *session, 2301 struct ceph_mds_session *session,
2303 struct ceph_cap *cap, 2302 struct ceph_cap *cap,
2304 struct ceph_buffer *xattr_buf) 2303 struct ceph_buffer *xattr_buf)
2305 __releases(inode->i_lock) 2304 __releases(ci->i_ceph_lock)
2306{ 2305{
2307 struct ceph_inode_info *ci = ceph_inode(inode); 2306 struct ceph_inode_info *ci = ceph_inode(inode);
2308 int mds = session->s_mds; 2307 int mds = session->s_mds;
@@ -2453,7 +2452,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2453 } 2452 }
2454 BUG_ON(cap->issued & ~cap->implemented); 2453 BUG_ON(cap->issued & ~cap->implemented);
2455 2454
2456 spin_unlock(&inode->i_lock); 2455 spin_unlock(&ci->i_ceph_lock);
2457 if (writeback) 2456 if (writeback)
2458 /* 2457 /*
2459 * queue inode for writeback: we can't actually call 2458 * queue inode for writeback: we can't actually call
@@ -2483,7 +2482,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2483 struct ceph_mds_caps *m, 2482 struct ceph_mds_caps *m,
2484 struct ceph_mds_session *session, 2483 struct ceph_mds_session *session,
2485 struct ceph_cap *cap) 2484 struct ceph_cap *cap)
2486 __releases(inode->i_lock) 2485 __releases(ci->i_ceph_lock)
2487{ 2486{
2488 struct ceph_inode_info *ci = ceph_inode(inode); 2487 struct ceph_inode_info *ci = ceph_inode(inode);
2489 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 2488 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -2539,7 +2538,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2539 wake_up_all(&ci->i_cap_wq); 2538 wake_up_all(&ci->i_cap_wq);
2540 2539
2541out: 2540out:
2542 spin_unlock(&inode->i_lock); 2541 spin_unlock(&ci->i_ceph_lock);
2543 if (drop) 2542 if (drop)
2544 iput(inode); 2543 iput(inode);
2545} 2544}
@@ -2562,7 +2561,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2562 dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n", 2561 dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
2563 inode, ci, session->s_mds, follows); 2562 inode, ci, session->s_mds, follows);
2564 2563
2565 spin_lock(&inode->i_lock); 2564 spin_lock(&ci->i_ceph_lock);
2566 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 2565 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
2567 if (capsnap->follows == follows) { 2566 if (capsnap->follows == follows) {
2568 if (capsnap->flush_tid != flush_tid) { 2567 if (capsnap->flush_tid != flush_tid) {
@@ -2585,7 +2584,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2585 capsnap, capsnap->follows); 2584 capsnap, capsnap->follows);
2586 } 2585 }
2587 } 2586 }
2588 spin_unlock(&inode->i_lock); 2587 spin_unlock(&ci->i_ceph_lock);
2589 if (drop) 2588 if (drop)
2590 iput(inode); 2589 iput(inode);
2591} 2590}
@@ -2598,7 +2597,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2598static void handle_cap_trunc(struct inode *inode, 2597static void handle_cap_trunc(struct inode *inode,
2599 struct ceph_mds_caps *trunc, 2598 struct ceph_mds_caps *trunc,
2600 struct ceph_mds_session *session) 2599 struct ceph_mds_session *session)
2601 __releases(inode->i_lock) 2600 __releases(ci->i_ceph_lock)
2602{ 2601{
2603 struct ceph_inode_info *ci = ceph_inode(inode); 2602 struct ceph_inode_info *ci = ceph_inode(inode);
2604 int mds = session->s_mds; 2603 int mds = session->s_mds;
@@ -2617,7 +2616,7 @@ static void handle_cap_trunc(struct inode *inode,
2617 inode, mds, seq, truncate_size, truncate_seq); 2616 inode, mds, seq, truncate_size, truncate_seq);
2618 queue_trunc = ceph_fill_file_size(inode, issued, 2617 queue_trunc = ceph_fill_file_size(inode, issued,
2619 truncate_seq, truncate_size, size); 2618 truncate_seq, truncate_size, size);
2620 spin_unlock(&inode->i_lock); 2619 spin_unlock(&ci->i_ceph_lock);
2621 2620
2622 if (queue_trunc) 2621 if (queue_trunc)
2623 ceph_queue_vmtruncate(inode); 2622 ceph_queue_vmtruncate(inode);
@@ -2646,7 +2645,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2646 dout("handle_cap_export inode %p ci %p mds%d mseq %d\n", 2645 dout("handle_cap_export inode %p ci %p mds%d mseq %d\n",
2647 inode, ci, mds, mseq); 2646 inode, ci, mds, mseq);
2648 2647
2649 spin_lock(&inode->i_lock); 2648 spin_lock(&ci->i_ceph_lock);
2650 2649
2651 /* make sure we haven't seen a higher mseq */ 2650 /* make sure we haven't seen a higher mseq */
2652 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { 2651 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
@@ -2690,7 +2689,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2690 } 2689 }
2691 /* else, we already released it */ 2690 /* else, we already released it */
2692 2691
2693 spin_unlock(&inode->i_lock); 2692 spin_unlock(&ci->i_ceph_lock);
2694} 2693}
2695 2694
2696/* 2695/*
@@ -2745,9 +2744,9 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2745 up_read(&mdsc->snap_rwsem); 2744 up_read(&mdsc->snap_rwsem);
2746 2745
2747 /* make sure we re-request max_size, if necessary */ 2746 /* make sure we re-request max_size, if necessary */
2748 spin_lock(&inode->i_lock); 2747 spin_lock(&ci->i_ceph_lock);
2749 ci->i_requested_max_size = 0; 2748 ci->i_requested_max_size = 0;
2750 spin_unlock(&inode->i_lock); 2749 spin_unlock(&ci->i_ceph_lock);
2751} 2750}
2752 2751
2753/* 2752/*
@@ -2762,6 +2761,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2762 struct ceph_mds_client *mdsc = session->s_mdsc; 2761 struct ceph_mds_client *mdsc = session->s_mdsc;
2763 struct super_block *sb = mdsc->fsc->sb; 2762 struct super_block *sb = mdsc->fsc->sb;
2764 struct inode *inode; 2763 struct inode *inode;
2764 struct ceph_inode_info *ci;
2765 struct ceph_cap *cap; 2765 struct ceph_cap *cap;
2766 struct ceph_mds_caps *h; 2766 struct ceph_mds_caps *h;
2767 int mds = session->s_mds; 2767 int mds = session->s_mds;
@@ -2815,6 +2815,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2815 2815
2816 /* lookup ino */ 2816 /* lookup ino */
2817 inode = ceph_find_inode(sb, vino); 2817 inode = ceph_find_inode(sb, vino);
2818 ci = ceph_inode(inode);
2818 dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino, 2819 dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
2819 vino.snap, inode); 2820 vino.snap, inode);
2820 if (!inode) { 2821 if (!inode) {
@@ -2844,16 +2845,16 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2844 } 2845 }
2845 2846
2846 /* the rest require a cap */ 2847 /* the rest require a cap */
2847 spin_lock(&inode->i_lock); 2848 spin_lock(&ci->i_ceph_lock);
2848 cap = __get_cap_for_mds(ceph_inode(inode), mds); 2849 cap = __get_cap_for_mds(ceph_inode(inode), mds);
2849 if (!cap) { 2850 if (!cap) {
2850 dout(" no cap on %p ino %llx.%llx from mds%d\n", 2851 dout(" no cap on %p ino %llx.%llx from mds%d\n",
2851 inode, ceph_ino(inode), ceph_snap(inode), mds); 2852 inode, ceph_ino(inode), ceph_snap(inode), mds);
2852 spin_unlock(&inode->i_lock); 2853 spin_unlock(&ci->i_ceph_lock);
2853 goto flush_cap_releases; 2854 goto flush_cap_releases;
2854 } 2855 }
2855 2856
2856 /* note that each of these drops i_lock for us */ 2857 /* note that each of these drops i_ceph_lock for us */
2857 switch (op) { 2858 switch (op) {
2858 case CEPH_CAP_OP_REVOKE: 2859 case CEPH_CAP_OP_REVOKE:
2859 case CEPH_CAP_OP_GRANT: 2860 case CEPH_CAP_OP_GRANT:
@@ -2869,7 +2870,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2869 break; 2870 break;
2870 2871
2871 default: 2872 default:
2872 spin_unlock(&inode->i_lock); 2873 spin_unlock(&ci->i_ceph_lock);
2873 pr_err("ceph_handle_caps: unknown cap op %d %s\n", op, 2874 pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
2874 ceph_cap_op_name(op)); 2875 ceph_cap_op_name(op));
2875 } 2876 }
@@ -2962,13 +2963,13 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
2962 struct inode *inode = &ci->vfs_inode; 2963 struct inode *inode = &ci->vfs_inode;
2963 int last = 0; 2964 int last = 0;
2964 2965
2965 spin_lock(&inode->i_lock); 2966 spin_lock(&ci->i_ceph_lock);
2966 dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode, 2967 dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
2967 ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1); 2968 ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
2968 BUG_ON(ci->i_nr_by_mode[fmode] == 0); 2969 BUG_ON(ci->i_nr_by_mode[fmode] == 0);
2969 if (--ci->i_nr_by_mode[fmode] == 0) 2970 if (--ci->i_nr_by_mode[fmode] == 0)
2970 last++; 2971 last++;
2971 spin_unlock(&inode->i_lock); 2972 spin_unlock(&ci->i_ceph_lock);
2972 2973
2973 if (last && ci->i_vino.snap == CEPH_NOSNAP) 2974 if (last && ci->i_vino.snap == CEPH_NOSNAP)
2974 ceph_check_caps(ci, 0, NULL); 2975 ceph_check_caps(ci, 0, NULL);
@@ -2991,7 +2992,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
2991 int used, dirty; 2992 int used, dirty;
2992 int ret = 0; 2993 int ret = 0;
2993 2994
2994 spin_lock(&inode->i_lock); 2995 spin_lock(&ci->i_ceph_lock);
2995 used = __ceph_caps_used(ci); 2996 used = __ceph_caps_used(ci);
2996 dirty = __ceph_caps_dirty(ci); 2997 dirty = __ceph_caps_dirty(ci);
2997 2998
@@ -3046,7 +3047,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
3046 inode, cap, ceph_cap_string(cap->issued)); 3047 inode, cap, ceph_cap_string(cap->issued));
3047 } 3048 }
3048 } 3049 }
3049 spin_unlock(&inode->i_lock); 3050 spin_unlock(&ci->i_ceph_lock);
3050 return ret; 3051 return ret;
3051} 3052}
3052 3053
@@ -3061,7 +3062,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
3061 3062
3062 /* 3063 /*
3063 * force an record for the directory caps if we have a dentry lease. 3064 * force an record for the directory caps if we have a dentry lease.
3064 * this is racy (can't take i_lock and d_lock together), but it 3065 * this is racy (can't take i_ceph_lock and d_lock together), but it
3065 * doesn't have to be perfect; the mds will revoke anything we don't 3066 * doesn't have to be perfect; the mds will revoke anything we don't
3066 * release. 3067 * release.
3067 */ 3068 */
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index bca3948e9dbf..3eeb97661262 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -281,18 +281,18 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
281 } 281 }
282 282
283 /* can we use the dcache? */ 283 /* can we use the dcache? */
284 spin_lock(&inode->i_lock); 284 spin_lock(&ci->i_ceph_lock);
285 if ((filp->f_pos == 2 || fi->dentry) && 285 if ((filp->f_pos == 2 || fi->dentry) &&
286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
287 ceph_snap(inode) != CEPH_SNAPDIR && 287 ceph_snap(inode) != CEPH_SNAPDIR &&
288 ceph_dir_test_complete(inode) && 288 ceph_dir_test_complete(inode) &&
289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
290 spin_unlock(&inode->i_lock); 290 spin_unlock(&ci->i_ceph_lock);
291 err = __dcache_readdir(filp, dirent, filldir); 291 err = __dcache_readdir(filp, dirent, filldir);
292 if (err != -EAGAIN) 292 if (err != -EAGAIN)
293 return err; 293 return err;
294 } else { 294 } else {
295 spin_unlock(&inode->i_lock); 295 spin_unlock(&ci->i_ceph_lock);
296 } 296 }
297 if (fi->dentry) { 297 if (fi->dentry) {
298 err = note_last_dentry(fi, fi->dentry->d_name.name, 298 err = note_last_dentry(fi, fi->dentry->d_name.name,
@@ -428,12 +428,12 @@ more:
428 * were released during the whole readdir, and we should have 428 * were released during the whole readdir, and we should have
429 * the complete dir contents in our cache. 429 * the complete dir contents in our cache.
430 */ 430 */
431 spin_lock(&inode->i_lock); 431 spin_lock(&ci->i_ceph_lock);
432 if (ci->i_release_count == fi->dir_release_count) { 432 if (ci->i_release_count == fi->dir_release_count) {
433 ceph_dir_set_complete(inode); 433 ceph_dir_set_complete(inode);
434 ci->i_max_offset = filp->f_pos; 434 ci->i_max_offset = filp->f_pos;
435 } 435 }
436 spin_unlock(&inode->i_lock); 436 spin_unlock(&ci->i_ceph_lock);
437 437
438 dout("readdir %p filp %p done.\n", inode, filp); 438 dout("readdir %p filp %p done.\n", inode, filp);
439 return 0; 439 return 0;
@@ -607,7 +607,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
607 struct ceph_inode_info *ci = ceph_inode(dir); 607 struct ceph_inode_info *ci = ceph_inode(dir);
608 struct ceph_dentry_info *di = ceph_dentry(dentry); 608 struct ceph_dentry_info *di = ceph_dentry(dentry);
609 609
610 spin_lock(&dir->i_lock); 610 spin_lock(&ci->i_ceph_lock);
611 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); 611 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
612 if (strncmp(dentry->d_name.name, 612 if (strncmp(dentry->d_name.name,
613 fsc->mount_options->snapdir_name, 613 fsc->mount_options->snapdir_name,
@@ -615,13 +615,13 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
615 !is_root_ceph_dentry(dir, dentry) && 615 !is_root_ceph_dentry(dir, dentry) &&
616 ceph_dir_test_complete(dir) && 616 ceph_dir_test_complete(dir) &&
617 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 617 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
618 spin_unlock(&dir->i_lock); 618 spin_unlock(&ci->i_ceph_lock);
619 dout(" dir %p complete, -ENOENT\n", dir); 619 dout(" dir %p complete, -ENOENT\n", dir);
620 d_add(dentry, NULL); 620 d_add(dentry, NULL);
621 di->lease_shared_gen = ci->i_shared_gen; 621 di->lease_shared_gen = ci->i_shared_gen;
622 return NULL; 622 return NULL;
623 } 623 }
624 spin_unlock(&dir->i_lock); 624 spin_unlock(&ci->i_ceph_lock);
625 } 625 }
626 626
627 op = ceph_snap(dir) == CEPH_SNAPDIR ? 627 op = ceph_snap(dir) == CEPH_SNAPDIR ?
@@ -841,12 +841,12 @@ static int drop_caps_for_unlink(struct inode *inode)
841 struct ceph_inode_info *ci = ceph_inode(inode); 841 struct ceph_inode_info *ci = ceph_inode(inode);
842 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; 842 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
843 843
844 spin_lock(&inode->i_lock); 844 spin_lock(&ci->i_ceph_lock);
845 if (inode->i_nlink == 1) { 845 if (inode->i_nlink == 1) {
846 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); 846 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
847 ci->i_ceph_flags |= CEPH_I_NODELAY; 847 ci->i_ceph_flags |= CEPH_I_NODELAY;
848 } 848 }
849 spin_unlock(&inode->i_lock); 849 spin_unlock(&ci->i_ceph_lock);
850 return drop; 850 return drop;
851} 851}
852 852
@@ -1015,10 +1015,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
1015 struct ceph_dentry_info *di = ceph_dentry(dentry); 1015 struct ceph_dentry_info *di = ceph_dentry(dentry);
1016 int valid = 0; 1016 int valid = 0;
1017 1017
1018 spin_lock(&dir->i_lock); 1018 spin_lock(&ci->i_ceph_lock);
1019 if (ci->i_shared_gen == di->lease_shared_gen) 1019 if (ci->i_shared_gen == di->lease_shared_gen)
1020 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); 1020 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
1021 spin_unlock(&dir->i_lock); 1021 spin_unlock(&ci->i_ceph_lock);
1022 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", 1022 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
1023 dir, (unsigned)ci->i_shared_gen, dentry, 1023 dir, (unsigned)ci->i_shared_gen, dentry,
1024 (unsigned)di->lease_shared_gen, valid); 1024 (unsigned)di->lease_shared_gen, valid);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ce549d31eeb7..af187330cc89 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -147,9 +147,9 @@ int ceph_open(struct inode *inode, struct file *file)
147 147
148 /* trivially open snapdir */ 148 /* trivially open snapdir */
149 if (ceph_snap(inode) == CEPH_SNAPDIR) { 149 if (ceph_snap(inode) == CEPH_SNAPDIR) {
150 spin_lock(&inode->i_lock); 150 spin_lock(&ci->i_ceph_lock);
151 __ceph_get_fmode(ci, fmode); 151 __ceph_get_fmode(ci, fmode);
152 spin_unlock(&inode->i_lock); 152 spin_unlock(&ci->i_ceph_lock);
153 return ceph_init_file(inode, file, fmode); 153 return ceph_init_file(inode, file, fmode);
154 } 154 }
155 155
@@ -158,7 +158,7 @@ int ceph_open(struct inode *inode, struct file *file)
158 * write) or any MDS (for read). Update wanted set 158 * write) or any MDS (for read). Update wanted set
159 * asynchronously. 159 * asynchronously.
160 */ 160 */
161 spin_lock(&inode->i_lock); 161 spin_lock(&ci->i_ceph_lock);
162 if (__ceph_is_any_real_caps(ci) && 162 if (__ceph_is_any_real_caps(ci) &&
163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { 163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
164 int mds_wanted = __ceph_caps_mds_wanted(ci); 164 int mds_wanted = __ceph_caps_mds_wanted(ci);
@@ -168,7 +168,7 @@ int ceph_open(struct inode *inode, struct file *file)
168 inode, fmode, ceph_cap_string(wanted), 168 inode, fmode, ceph_cap_string(wanted),
169 ceph_cap_string(issued)); 169 ceph_cap_string(issued));
170 __ceph_get_fmode(ci, fmode); 170 __ceph_get_fmode(ci, fmode);
171 spin_unlock(&inode->i_lock); 171 spin_unlock(&ci->i_ceph_lock);
172 172
173 /* adjust wanted? */ 173 /* adjust wanted? */
174 if ((issued & wanted) != wanted && 174 if ((issued & wanted) != wanted &&
@@ -180,10 +180,10 @@ int ceph_open(struct inode *inode, struct file *file)
180 } else if (ceph_snap(inode) != CEPH_NOSNAP && 180 } else if (ceph_snap(inode) != CEPH_NOSNAP &&
181 (ci->i_snap_caps & wanted) == wanted) { 181 (ci->i_snap_caps & wanted) == wanted) {
182 __ceph_get_fmode(ci, fmode); 182 __ceph_get_fmode(ci, fmode);
183 spin_unlock(&inode->i_lock); 183 spin_unlock(&ci->i_ceph_lock);
184 return ceph_init_file(inode, file, fmode); 184 return ceph_init_file(inode, file, fmode);
185 } 185 }
186 spin_unlock(&inode->i_lock); 186 spin_unlock(&ci->i_ceph_lock);
187 187
188 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); 188 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
189 req = prepare_open_request(inode->i_sb, flags, 0); 189 req = prepare_open_request(inode->i_sb, flags, 0);
@@ -743,9 +743,9 @@ retry_snap:
743 */ 743 */
744 int dirty; 744 int dirty;
745 745
746 spin_lock(&inode->i_lock); 746 spin_lock(&ci->i_ceph_lock);
747 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 747 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
748 spin_unlock(&inode->i_lock); 748 spin_unlock(&ci->i_ceph_lock);
749 ceph_put_cap_refs(ci, got); 749 ceph_put_cap_refs(ci, got);
750 750
751 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 751 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
@@ -764,9 +764,9 @@ retry_snap:
764 764
765 if (ret >= 0) { 765 if (ret >= 0) {
766 int dirty; 766 int dirty;
767 spin_lock(&inode->i_lock); 767 spin_lock(&ci->i_ceph_lock);
768 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 768 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
769 spin_unlock(&inode->i_lock); 769 spin_unlock(&ci->i_ceph_lock);
770 if (dirty) 770 if (dirty)
771 __mark_inode_dirty(inode, dirty); 771 __mark_inode_dirty(inode, dirty);
772 } 772 }
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 116f36502f17..87fb132fb330 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
297 297
298 dout("alloc_inode %p\n", &ci->vfs_inode); 298 dout("alloc_inode %p\n", &ci->vfs_inode);
299 299
300 spin_lock_init(&ci->i_ceph_lock);
301
300 ci->i_version = 0; 302 ci->i_version = 0;
301 ci->i_time_warp_seq = 0; 303 ci->i_time_warp_seq = 0;
302 ci->i_ceph_flags = 0; 304 ci->i_ceph_flags = 0;
@@ -583,7 +585,7 @@ static int fill_inode(struct inode *inode,
583 iinfo->xattr_len); 585 iinfo->xattr_len);
584 } 586 }
585 587
586 spin_lock(&inode->i_lock); 588 spin_lock(&ci->i_ceph_lock);
587 589
588 /* 590 /*
589 * provided version will be odd if inode value is projected, 591 * provided version will be odd if inode value is projected,
@@ -680,7 +682,7 @@ static int fill_inode(struct inode *inode,
680 char *sym; 682 char *sym;
681 683
682 BUG_ON(symlen != inode->i_size); 684 BUG_ON(symlen != inode->i_size);
683 spin_unlock(&inode->i_lock); 685 spin_unlock(&ci->i_ceph_lock);
684 686
685 err = -ENOMEM; 687 err = -ENOMEM;
686 sym = kmalloc(symlen+1, GFP_NOFS); 688 sym = kmalloc(symlen+1, GFP_NOFS);
@@ -689,7 +691,7 @@ static int fill_inode(struct inode *inode,
689 memcpy(sym, iinfo->symlink, symlen); 691 memcpy(sym, iinfo->symlink, symlen);
690 sym[symlen] = 0; 692 sym[symlen] = 0;
691 693
692 spin_lock(&inode->i_lock); 694 spin_lock(&ci->i_ceph_lock);
693 if (!ci->i_symlink) 695 if (!ci->i_symlink)
694 ci->i_symlink = sym; 696 ci->i_symlink = sym;
695 else 697 else
@@ -715,7 +717,7 @@ static int fill_inode(struct inode *inode,
715 } 717 }
716 718
717no_change: 719no_change:
718 spin_unlock(&inode->i_lock); 720 spin_unlock(&ci->i_ceph_lock);
719 721
720 /* queue truncate if we saw i_size decrease */ 722 /* queue truncate if we saw i_size decrease */
721 if (queue_trunc) 723 if (queue_trunc)
@@ -750,13 +752,13 @@ no_change:
750 info->cap.flags, 752 info->cap.flags,
751 caps_reservation); 753 caps_reservation);
752 } else { 754 } else {
753 spin_lock(&inode->i_lock); 755 spin_lock(&ci->i_ceph_lock);
754 dout(" %p got snap_caps %s\n", inode, 756 dout(" %p got snap_caps %s\n", inode,
755 ceph_cap_string(le32_to_cpu(info->cap.caps))); 757 ceph_cap_string(le32_to_cpu(info->cap.caps)));
756 ci->i_snap_caps |= le32_to_cpu(info->cap.caps); 758 ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
757 if (cap_fmode >= 0) 759 if (cap_fmode >= 0)
758 __ceph_get_fmode(ci, cap_fmode); 760 __ceph_get_fmode(ci, cap_fmode);
759 spin_unlock(&inode->i_lock); 761 spin_unlock(&ci->i_ceph_lock);
760 } 762 }
761 } else if (cap_fmode >= 0) { 763 } else if (cap_fmode >= 0) {
762 pr_warning("mds issued no caps on %llx.%llx\n", 764 pr_warning("mds issued no caps on %llx.%llx\n",
@@ -849,19 +851,20 @@ static void ceph_set_dentry_offset(struct dentry *dn)
849{ 851{
850 struct dentry *dir = dn->d_parent; 852 struct dentry *dir = dn->d_parent;
851 struct inode *inode = dir->d_inode; 853 struct inode *inode = dir->d_inode;
854 struct ceph_inode_info *ci = ceph_inode(inode);
852 struct ceph_dentry_info *di; 855 struct ceph_dentry_info *di;
853 856
854 BUG_ON(!inode); 857 BUG_ON(!inode);
855 858
856 di = ceph_dentry(dn); 859 di = ceph_dentry(dn);
857 860
858 spin_lock(&inode->i_lock); 861 spin_lock(&ci->i_ceph_lock);
859 if (!ceph_dir_test_complete(inode)) { 862 if (!ceph_dir_test_complete(inode)) {
860 spin_unlock(&inode->i_lock); 863 spin_unlock(&ci->i_ceph_lock);
861 return; 864 return;
862 } 865 }
863 di->offset = ceph_inode(inode)->i_max_offset++; 866 di->offset = ceph_inode(inode)->i_max_offset++;
864 spin_unlock(&inode->i_lock); 867 spin_unlock(&ci->i_ceph_lock);
865 868
866 spin_lock(&dir->d_lock); 869 spin_lock(&dir->d_lock);
867 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); 870 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1308,7 +1311,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
1308 struct ceph_inode_info *ci = ceph_inode(inode); 1311 struct ceph_inode_info *ci = ceph_inode(inode);
1309 int ret = 0; 1312 int ret = 0;
1310 1313
1311 spin_lock(&inode->i_lock); 1314 spin_lock(&ci->i_ceph_lock);
1312 dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size); 1315 dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
1313 inode->i_size = size; 1316 inode->i_size = size;
1314 inode->i_blocks = (size + (1 << 9) - 1) >> 9; 1317 inode->i_blocks = (size + (1 << 9) - 1) >> 9;
@@ -1318,7 +1321,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
1318 (ci->i_reported_size << 1) < ci->i_max_size) 1321 (ci->i_reported_size << 1) < ci->i_max_size)
1319 ret = 1; 1322 ret = 1;
1320 1323
1321 spin_unlock(&inode->i_lock); 1324 spin_unlock(&ci->i_ceph_lock);
1322 return ret; 1325 return ret;
1323} 1326}
1324 1327
@@ -1376,20 +1379,20 @@ static void ceph_invalidate_work(struct work_struct *work)
1376 u32 orig_gen; 1379 u32 orig_gen;
1377 int check = 0; 1380 int check = 0;
1378 1381
1379 spin_lock(&inode->i_lock); 1382 spin_lock(&ci->i_ceph_lock);
1380 dout("invalidate_pages %p gen %d revoking %d\n", inode, 1383 dout("invalidate_pages %p gen %d revoking %d\n", inode,
1381 ci->i_rdcache_gen, ci->i_rdcache_revoking); 1384 ci->i_rdcache_gen, ci->i_rdcache_revoking);
1382 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { 1385 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
1383 /* nevermind! */ 1386 /* nevermind! */
1384 spin_unlock(&inode->i_lock); 1387 spin_unlock(&ci->i_ceph_lock);
1385 goto out; 1388 goto out;
1386 } 1389 }
1387 orig_gen = ci->i_rdcache_gen; 1390 orig_gen = ci->i_rdcache_gen;
1388 spin_unlock(&inode->i_lock); 1391 spin_unlock(&ci->i_ceph_lock);
1389 1392
1390 truncate_inode_pages(&inode->i_data, 0); 1393 truncate_inode_pages(&inode->i_data, 0);
1391 1394
1392 spin_lock(&inode->i_lock); 1395 spin_lock(&ci->i_ceph_lock);
1393 if (orig_gen == ci->i_rdcache_gen && 1396 if (orig_gen == ci->i_rdcache_gen &&
1394 orig_gen == ci->i_rdcache_revoking) { 1397 orig_gen == ci->i_rdcache_revoking) {
1395 dout("invalidate_pages %p gen %d successful\n", inode, 1398 dout("invalidate_pages %p gen %d successful\n", inode,
@@ -1401,7 +1404,7 @@ static void ceph_invalidate_work(struct work_struct *work)
1401 inode, orig_gen, ci->i_rdcache_gen, 1404 inode, orig_gen, ci->i_rdcache_gen,
1402 ci->i_rdcache_revoking); 1405 ci->i_rdcache_revoking);
1403 } 1406 }
1404 spin_unlock(&inode->i_lock); 1407 spin_unlock(&ci->i_ceph_lock);
1405 1408
1406 if (check) 1409 if (check)
1407 ceph_check_caps(ci, 0, NULL); 1410 ceph_check_caps(ci, 0, NULL);
@@ -1460,10 +1463,10 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
1460 int wrbuffer_refs, wake = 0; 1463 int wrbuffer_refs, wake = 0;
1461 1464
1462retry: 1465retry:
1463 spin_lock(&inode->i_lock); 1466 spin_lock(&ci->i_ceph_lock);
1464 if (ci->i_truncate_pending == 0) { 1467 if (ci->i_truncate_pending == 0) {
1465 dout("__do_pending_vmtruncate %p none pending\n", inode); 1468 dout("__do_pending_vmtruncate %p none pending\n", inode);
1466 spin_unlock(&inode->i_lock); 1469 spin_unlock(&ci->i_ceph_lock);
1467 return; 1470 return;
1468 } 1471 }
1469 1472
@@ -1474,7 +1477,7 @@ retry:
1474 if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { 1477 if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
1475 dout("__do_pending_vmtruncate %p flushing snaps first\n", 1478 dout("__do_pending_vmtruncate %p flushing snaps first\n",
1476 inode); 1479 inode);
1477 spin_unlock(&inode->i_lock); 1480 spin_unlock(&ci->i_ceph_lock);
1478 filemap_write_and_wait_range(&inode->i_data, 0, 1481 filemap_write_and_wait_range(&inode->i_data, 0,
1479 inode->i_sb->s_maxbytes); 1482 inode->i_sb->s_maxbytes);
1480 goto retry; 1483 goto retry;
@@ -1484,15 +1487,15 @@ retry:
1484 wrbuffer_refs = ci->i_wrbuffer_ref; 1487 wrbuffer_refs = ci->i_wrbuffer_ref;
1485 dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode, 1488 dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
1486 ci->i_truncate_pending, to); 1489 ci->i_truncate_pending, to);
1487 spin_unlock(&inode->i_lock); 1490 spin_unlock(&ci->i_ceph_lock);
1488 1491
1489 truncate_inode_pages(inode->i_mapping, to); 1492 truncate_inode_pages(inode->i_mapping, to);
1490 1493
1491 spin_lock(&inode->i_lock); 1494 spin_lock(&ci->i_ceph_lock);
1492 ci->i_truncate_pending--; 1495 ci->i_truncate_pending--;
1493 if (ci->i_truncate_pending == 0) 1496 if (ci->i_truncate_pending == 0)
1494 wake = 1; 1497 wake = 1;
1495 spin_unlock(&inode->i_lock); 1498 spin_unlock(&ci->i_ceph_lock);
1496 1499
1497 if (wrbuffer_refs == 0) 1500 if (wrbuffer_refs == 0)
1498 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 1501 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -1547,7 +1550,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1547 if (IS_ERR(req)) 1550 if (IS_ERR(req))
1548 return PTR_ERR(req); 1551 return PTR_ERR(req);
1549 1552
1550 spin_lock(&inode->i_lock); 1553 spin_lock(&ci->i_ceph_lock);
1551 issued = __ceph_caps_issued(ci, NULL); 1554 issued = __ceph_caps_issued(ci, NULL);
1552 dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); 1555 dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
1553 1556
@@ -1695,7 +1698,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1695 } 1698 }
1696 1699
1697 release &= issued; 1700 release &= issued;
1698 spin_unlock(&inode->i_lock); 1701 spin_unlock(&ci->i_ceph_lock);
1699 1702
1700 if (inode_dirty_flags) 1703 if (inode_dirty_flags)
1701 __mark_inode_dirty(inode, inode_dirty_flags); 1704 __mark_inode_dirty(inode, inode_dirty_flags);
@@ -1717,7 +1720,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1717 __ceph_do_pending_vmtruncate(inode); 1720 __ceph_do_pending_vmtruncate(inode);
1718 return err; 1721 return err;
1719out: 1722out:
1720 spin_unlock(&inode->i_lock); 1723 spin_unlock(&ci->i_ceph_lock);
1721 ceph_mdsc_put_request(req); 1724 ceph_mdsc_put_request(req);
1722 return err; 1725 return err;
1723} 1726}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 5a14c29cbba6..790914a598dd 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -241,11 +241,11 @@ static long ceph_ioctl_lazyio(struct file *file)
241 struct ceph_inode_info *ci = ceph_inode(inode); 241 struct ceph_inode_info *ci = ceph_inode(inode);
242 242
243 if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { 243 if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
244 spin_lock(&inode->i_lock); 244 spin_lock(&ci->i_ceph_lock);
245 ci->i_nr_by_mode[fi->fmode]--; 245 ci->i_nr_by_mode[fi->fmode]--;
246 fi->fmode |= CEPH_FILE_MODE_LAZY; 246 fi->fmode |= CEPH_FILE_MODE_LAZY;
247 ci->i_nr_by_mode[fi->fmode]++; 247 ci->i_nr_by_mode[fi->fmode]++;
248 spin_unlock(&inode->i_lock); 248 spin_unlock(&ci->i_ceph_lock);
249 dout("ioctl_layzio: file %p marked lazy\n", file); 249 dout("ioctl_layzio: file %p marked lazy\n", file);
250 250
251 ceph_check_caps(ci, 0, NULL); 251 ceph_check_caps(ci, 0, NULL);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 264ab701154f..34bc35084ae3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -732,21 +732,21 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
732 } 732 }
733 } 733 }
734 734
735 spin_lock(&inode->i_lock); 735 spin_lock(&ci->i_ceph_lock);
736 cap = NULL; 736 cap = NULL;
737 if (mode == USE_AUTH_MDS) 737 if (mode == USE_AUTH_MDS)
738 cap = ci->i_auth_cap; 738 cap = ci->i_auth_cap;
739 if (!cap && !RB_EMPTY_ROOT(&ci->i_caps)) 739 if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
740 cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); 740 cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
741 if (!cap) { 741 if (!cap) {
742 spin_unlock(&inode->i_lock); 742 spin_unlock(&ci->i_ceph_lock);
743 goto random; 743 goto random;
744 } 744 }
745 mds = cap->session->s_mds; 745 mds = cap->session->s_mds;
746 dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n", 746 dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
747 inode, ceph_vinop(inode), mds, 747 inode, ceph_vinop(inode), mds,
748 cap == ci->i_auth_cap ? "auth " : "", cap); 748 cap == ci->i_auth_cap ? "auth " : "", cap);
749 spin_unlock(&inode->i_lock); 749 spin_unlock(&ci->i_ceph_lock);
750 return mds; 750 return mds;
751 751
752random: 752random:
@@ -951,7 +951,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
951 951
952 dout("removing cap %p, ci is %p, inode is %p\n", 952 dout("removing cap %p, ci is %p, inode is %p\n",
953 cap, ci, &ci->vfs_inode); 953 cap, ci, &ci->vfs_inode);
954 spin_lock(&inode->i_lock); 954 spin_lock(&ci->i_ceph_lock);
955 __ceph_remove_cap(cap); 955 __ceph_remove_cap(cap);
956 if (!__ceph_is_any_real_caps(ci)) { 956 if (!__ceph_is_any_real_caps(ci)) {
957 struct ceph_mds_client *mdsc = 957 struct ceph_mds_client *mdsc =
@@ -984,7 +984,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
984 } 984 }
985 spin_unlock(&mdsc->cap_dirty_lock); 985 spin_unlock(&mdsc->cap_dirty_lock);
986 } 986 }
987 spin_unlock(&inode->i_lock); 987 spin_unlock(&ci->i_ceph_lock);
988 while (drop--) 988 while (drop--)
989 iput(inode); 989 iput(inode);
990 return 0; 990 return 0;
@@ -1015,10 +1015,10 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
1015 1015
1016 wake_up_all(&ci->i_cap_wq); 1016 wake_up_all(&ci->i_cap_wq);
1017 if (arg) { 1017 if (arg) {
1018 spin_lock(&inode->i_lock); 1018 spin_lock(&ci->i_ceph_lock);
1019 ci->i_wanted_max_size = 0; 1019 ci->i_wanted_max_size = 0;
1020 ci->i_requested_max_size = 0; 1020 ci->i_requested_max_size = 0;
1021 spin_unlock(&inode->i_lock); 1021 spin_unlock(&ci->i_ceph_lock);
1022 } 1022 }
1023 return 0; 1023 return 0;
1024} 1024}
@@ -1151,7 +1151,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1151 if (session->s_trim_caps <= 0) 1151 if (session->s_trim_caps <= 0)
1152 return -1; 1152 return -1;
1153 1153
1154 spin_lock(&inode->i_lock); 1154 spin_lock(&ci->i_ceph_lock);
1155 mine = cap->issued | cap->implemented; 1155 mine = cap->issued | cap->implemented;
1156 used = __ceph_caps_used(ci); 1156 used = __ceph_caps_used(ci);
1157 oissued = __ceph_caps_issued_other(ci, cap); 1157 oissued = __ceph_caps_issued_other(ci, cap);
@@ -1170,7 +1170,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1170 __ceph_remove_cap(cap); 1170 __ceph_remove_cap(cap);
1171 } else { 1171 } else {
1172 /* try to drop referring dentries */ 1172 /* try to drop referring dentries */
1173 spin_unlock(&inode->i_lock); 1173 spin_unlock(&ci->i_ceph_lock);
1174 d_prune_aliases(inode); 1174 d_prune_aliases(inode);
1175 dout("trim_caps_cb %p cap %p pruned, count now %d\n", 1175 dout("trim_caps_cb %p cap %p pruned, count now %d\n",
1176 inode, cap, atomic_read(&inode->i_count)); 1176 inode, cap, atomic_read(&inode->i_count));
@@ -1178,7 +1178,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1178 } 1178 }
1179 1179
1180out: 1180out:
1181 spin_unlock(&inode->i_lock); 1181 spin_unlock(&ci->i_ceph_lock);
1182 return 0; 1182 return 0;
1183} 1183}
1184 1184
@@ -1296,7 +1296,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1296 i_flushing_item); 1296 i_flushing_item);
1297 struct inode *inode = &ci->vfs_inode; 1297 struct inode *inode = &ci->vfs_inode;
1298 1298
1299 spin_lock(&inode->i_lock); 1299 spin_lock(&ci->i_ceph_lock);
1300 if (ci->i_cap_flush_seq <= want_flush_seq) { 1300 if (ci->i_cap_flush_seq <= want_flush_seq) {
1301 dout("check_cap_flush still flushing %p " 1301 dout("check_cap_flush still flushing %p "
1302 "seq %lld <= %lld to mds%d\n", inode, 1302 "seq %lld <= %lld to mds%d\n", inode,
@@ -1304,7 +1304,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1304 session->s_mds); 1304 session->s_mds);
1305 ret = 0; 1305 ret = 0;
1306 } 1306 }
1307 spin_unlock(&inode->i_lock); 1307 spin_unlock(&ci->i_ceph_lock);
1308 } 1308 }
1309 mutex_unlock(&session->s_mutex); 1309 mutex_unlock(&session->s_mutex);
1310 ceph_put_mds_session(session); 1310 ceph_put_mds_session(session);
@@ -2011,10 +2011,10 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
2011 struct ceph_inode_info *ci = ceph_inode(inode); 2011 struct ceph_inode_info *ci = ceph_inode(inode);
2012 2012
2013 dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode); 2013 dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
2014 spin_lock(&inode->i_lock); 2014 spin_lock(&ci->i_ceph_lock);
2015 ceph_dir_clear_complete(inode); 2015 ceph_dir_clear_complete(inode);
2016 ci->i_release_count++; 2016 ci->i_release_count++;
2017 spin_unlock(&inode->i_lock); 2017 spin_unlock(&ci->i_ceph_lock);
2018 2018
2019 if (req->r_dentry) 2019 if (req->r_dentry)
2020 ceph_invalidate_dentry_lease(req->r_dentry); 2020 ceph_invalidate_dentry_lease(req->r_dentry);
@@ -2422,7 +2422,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2422 if (err) 2422 if (err)
2423 goto out_free; 2423 goto out_free;
2424 2424
2425 spin_lock(&inode->i_lock); 2425 spin_lock(&ci->i_ceph_lock);
2426 cap->seq = 0; /* reset cap seq */ 2426 cap->seq = 0; /* reset cap seq */
2427 cap->issue_seq = 0; /* and issue_seq */ 2427 cap->issue_seq = 0; /* and issue_seq */
2428 2428
@@ -2445,7 +2445,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2445 rec.v1.pathbase = cpu_to_le64(pathbase); 2445 rec.v1.pathbase = cpu_to_le64(pathbase);
2446 reclen = sizeof(rec.v1); 2446 reclen = sizeof(rec.v1);
2447 } 2447 }
2448 spin_unlock(&inode->i_lock); 2448 spin_unlock(&ci->i_ceph_lock);
2449 2449
2450 if (recon_state->flock) { 2450 if (recon_state->flock) {
2451 int num_fcntl_locks, num_flock_locks; 2451 int num_fcntl_locks, num_flock_locks;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4bb239921dbd..a50ca0e39475 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -20,7 +20,7 @@
20 * 20 *
21 * mdsc->snap_rwsem 21 * mdsc->snap_rwsem
22 * 22 *
23 * inode->i_lock 23 * ci->i_ceph_lock
24 * mdsc->snap_flush_lock 24 * mdsc->snap_flush_lock
25 * mdsc->cap_delay_lock 25 * mdsc->cap_delay_lock
26 * 26 *
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e26437191333..a559c80f127a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -446,7 +446,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
446 return; 446 return;
447 } 447 }
448 448
449 spin_lock(&inode->i_lock); 449 spin_lock(&ci->i_ceph_lock);
450 used = __ceph_caps_used(ci); 450 used = __ceph_caps_used(ci);
451 dirty = __ceph_caps_dirty(ci); 451 dirty = __ceph_caps_dirty(ci);
452 452
@@ -528,7 +528,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
528 kfree(capsnap); 528 kfree(capsnap);
529 } 529 }
530 530
531 spin_unlock(&inode->i_lock); 531 spin_unlock(&ci->i_ceph_lock);
532} 532}
533 533
534/* 534/*
@@ -537,7 +537,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
537 * 537 *
538 * If capsnap can now be flushed, add to snap_flush list, and return 1. 538 * If capsnap can now be flushed, add to snap_flush list, and return 1.
539 * 539 *
540 * Caller must hold i_lock. 540 * Caller must hold i_ceph_lock.
541 */ 541 */
542int __ceph_finish_cap_snap(struct ceph_inode_info *ci, 542int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
543 struct ceph_cap_snap *capsnap) 543 struct ceph_cap_snap *capsnap)
@@ -739,9 +739,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
739 inode = &ci->vfs_inode; 739 inode = &ci->vfs_inode;
740 ihold(inode); 740 ihold(inode);
741 spin_unlock(&mdsc->snap_flush_lock); 741 spin_unlock(&mdsc->snap_flush_lock);
742 spin_lock(&inode->i_lock); 742 spin_lock(&ci->i_ceph_lock);
743 __ceph_flush_snaps(ci, &session, 0); 743 __ceph_flush_snaps(ci, &session, 0);
744 spin_unlock(&inode->i_lock); 744 spin_unlock(&ci->i_ceph_lock);
745 iput(inode); 745 iput(inode);
746 spin_lock(&mdsc->snap_flush_lock); 746 spin_lock(&mdsc->snap_flush_lock);
747 } 747 }
@@ -847,7 +847,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
847 continue; 847 continue;
848 ci = ceph_inode(inode); 848 ci = ceph_inode(inode);
849 849
850 spin_lock(&inode->i_lock); 850 spin_lock(&ci->i_ceph_lock);
851 if (!ci->i_snap_realm) 851 if (!ci->i_snap_realm)
852 goto skip_inode; 852 goto skip_inode;
853 /* 853 /*
@@ -876,7 +876,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
876 oldrealm = ci->i_snap_realm; 876 oldrealm = ci->i_snap_realm;
877 ci->i_snap_realm = realm; 877 ci->i_snap_realm = realm;
878 spin_unlock(&realm->inodes_with_caps_lock); 878 spin_unlock(&realm->inodes_with_caps_lock);
879 spin_unlock(&inode->i_lock); 879 spin_unlock(&ci->i_ceph_lock);
880 880
881 ceph_get_snap_realm(mdsc, realm); 881 ceph_get_snap_realm(mdsc, realm);
882 ceph_put_snap_realm(mdsc, oldrealm); 882 ceph_put_snap_realm(mdsc, oldrealm);
@@ -885,7 +885,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
885 continue; 885 continue;
886 886
887skip_inode: 887skip_inode:
888 spin_unlock(&inode->i_lock); 888 spin_unlock(&ci->i_ceph_lock);
889 iput(inode); 889 iput(inode);
890 } 890 }
891 891
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 01bf189e08a9..edcbf3774a56 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -220,7 +220,7 @@ struct ceph_dentry_info {
220 * The locking for D_COMPLETE is a bit odd: 220 * The locking for D_COMPLETE is a bit odd:
221 * - we can clear it at almost any time (see ceph_d_prune) 221 * - we can clear it at almost any time (see ceph_d_prune)
222 * - it is only meaningful if: 222 * - it is only meaningful if:
223 * - we hold dir inode i_lock 223 * - we hold dir inode i_ceph_lock
224 * - we hold dir FILE_SHARED caps 224 * - we hold dir FILE_SHARED caps
225 * - the dentry D_COMPLETE is set 225 * - the dentry D_COMPLETE is set
226 */ 226 */
@@ -250,6 +250,8 @@ struct ceph_inode_xattrs_info {
250struct ceph_inode_info { 250struct ceph_inode_info {
251 struct ceph_vino i_vino; /* ceph ino + snap */ 251 struct ceph_vino i_vino; /* ceph ino + snap */
252 252
253 spinlock_t i_ceph_lock;
254
253 u64 i_version; 255 u64 i_version;
254 u32 i_time_warp_seq; 256 u32 i_time_warp_seq;
255 257
@@ -271,7 +273,7 @@ struct ceph_inode_info {
271 273
272 struct ceph_inode_xattrs_info i_xattrs; 274 struct ceph_inode_xattrs_info i_xattrs;
273 275
274 /* capabilities. protected _both_ by i_lock and cap->session's 276 /* capabilities. protected _both_ by i_ceph_lock and cap->session's
275 * s_mutex. */ 277 * s_mutex. */
276 struct rb_root i_caps; /* cap list */ 278 struct rb_root i_caps; /* cap list */
277 struct ceph_cap *i_auth_cap; /* authoritative cap, if any */ 279 struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
@@ -437,18 +439,18 @@ static inline void ceph_i_clear(struct inode *inode, unsigned mask)
437{ 439{
438 struct ceph_inode_info *ci = ceph_inode(inode); 440 struct ceph_inode_info *ci = ceph_inode(inode);
439 441
440 spin_lock(&inode->i_lock); 442 spin_lock(&ci->i_ceph_lock);
441 ci->i_ceph_flags &= ~mask; 443 ci->i_ceph_flags &= ~mask;
442 spin_unlock(&inode->i_lock); 444 spin_unlock(&ci->i_ceph_lock);
443} 445}
444 446
445static inline void ceph_i_set(struct inode *inode, unsigned mask) 447static inline void ceph_i_set(struct inode *inode, unsigned mask)
446{ 448{
447 struct ceph_inode_info *ci = ceph_inode(inode); 449 struct ceph_inode_info *ci = ceph_inode(inode);
448 450
449 spin_lock(&inode->i_lock); 451 spin_lock(&ci->i_ceph_lock);
450 ci->i_ceph_flags |= mask; 452 ci->i_ceph_flags |= mask;
451 spin_unlock(&inode->i_lock); 453 spin_unlock(&ci->i_ceph_lock);
452} 454}
453 455
454static inline bool ceph_i_test(struct inode *inode, unsigned mask) 456static inline bool ceph_i_test(struct inode *inode, unsigned mask)
@@ -456,9 +458,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask)
456 struct ceph_inode_info *ci = ceph_inode(inode); 458 struct ceph_inode_info *ci = ceph_inode(inode);
457 bool r; 459 bool r;
458 460
459 spin_lock(&inode->i_lock); 461 spin_lock(&ci->i_ceph_lock);
460 r = (ci->i_ceph_flags & mask) == mask; 462 r = (ci->i_ceph_flags & mask) == mask;
461 spin_unlock(&inode->i_lock); 463 spin_unlock(&ci->i_ceph_lock);
462 return r; 464 return r;
463} 465}
464 466
@@ -508,9 +510,9 @@ extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
508static inline int ceph_caps_issued(struct ceph_inode_info *ci) 510static inline int ceph_caps_issued(struct ceph_inode_info *ci)
509{ 511{
510 int issued; 512 int issued;
511 spin_lock(&ci->vfs_inode.i_lock); 513 spin_lock(&ci->i_ceph_lock);
512 issued = __ceph_caps_issued(ci, NULL); 514 issued = __ceph_caps_issued(ci, NULL);
513 spin_unlock(&ci->vfs_inode.i_lock); 515 spin_unlock(&ci->i_ceph_lock);
514 return issued; 516 return issued;
515} 517}
516 518
@@ -518,9 +520,9 @@ static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
518 int touch) 520 int touch)
519{ 521{
520 int r; 522 int r;
521 spin_lock(&ci->vfs_inode.i_lock); 523 spin_lock(&ci->i_ceph_lock);
522 r = __ceph_caps_issued_mask(ci, mask, touch); 524 r = __ceph_caps_issued_mask(ci, mask, touch);
523 spin_unlock(&ci->vfs_inode.i_lock); 525 spin_unlock(&ci->i_ceph_lock);
524 return r; 526 return r;
525} 527}
526 528
@@ -743,10 +745,9 @@ extern int ceph_add_cap(struct inode *inode,
743extern void __ceph_remove_cap(struct ceph_cap *cap); 745extern void __ceph_remove_cap(struct ceph_cap *cap);
744static inline void ceph_remove_cap(struct ceph_cap *cap) 746static inline void ceph_remove_cap(struct ceph_cap *cap)
745{ 747{
746 struct inode *inode = &cap->ci->vfs_inode; 748 spin_lock(&cap->ci->i_ceph_lock);
747 spin_lock(&inode->i_lock);
748 __ceph_remove_cap(cap); 749 __ceph_remove_cap(cap);
749 spin_unlock(&inode->i_lock); 750 spin_unlock(&cap->ci->i_ceph_lock);
750} 751}
751extern void ceph_put_cap(struct ceph_mds_client *mdsc, 752extern void ceph_put_cap(struct ceph_mds_client *mdsc,
752 struct ceph_cap *cap); 753 struct ceph_cap *cap);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 96c6739a0280..a5e36e4488a7 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -343,8 +343,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
343} 343}
344 344
345static int __build_xattrs(struct inode *inode) 345static int __build_xattrs(struct inode *inode)
346 __releases(inode->i_lock) 346 __releases(ci->i_ceph_lock)
347 __acquires(inode->i_lock) 347 __acquires(ci->i_ceph_lock)
348{ 348{
349 u32 namelen; 349 u32 namelen;
350 u32 numattr = 0; 350 u32 numattr = 0;
@@ -372,7 +372,7 @@ start:
372 end = p + ci->i_xattrs.blob->vec.iov_len; 372 end = p + ci->i_xattrs.blob->vec.iov_len;
373 ceph_decode_32_safe(&p, end, numattr, bad); 373 ceph_decode_32_safe(&p, end, numattr, bad);
374 xattr_version = ci->i_xattrs.version; 374 xattr_version = ci->i_xattrs.version;
375 spin_unlock(&inode->i_lock); 375 spin_unlock(&ci->i_ceph_lock);
376 376
377 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *), 377 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
378 GFP_NOFS); 378 GFP_NOFS);
@@ -387,7 +387,7 @@ start:
387 goto bad_lock; 387 goto bad_lock;
388 } 388 }
389 389
390 spin_lock(&inode->i_lock); 390 spin_lock(&ci->i_ceph_lock);
391 if (ci->i_xattrs.version != xattr_version) { 391 if (ci->i_xattrs.version != xattr_version) {
392 /* lost a race, retry */ 392 /* lost a race, retry */
393 for (i = 0; i < numattr; i++) 393 for (i = 0; i < numattr; i++)
@@ -418,7 +418,7 @@ start:
418 418
419 return err; 419 return err;
420bad_lock: 420bad_lock:
421 spin_lock(&inode->i_lock); 421 spin_lock(&ci->i_ceph_lock);
422bad: 422bad:
423 if (xattrs) { 423 if (xattrs) {
424 for (i = 0; i < numattr; i++) 424 for (i = 0; i < numattr; i++)
@@ -512,7 +512,7 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
512 if (vxattrs) 512 if (vxattrs)
513 vxattr = ceph_match_vxattr(vxattrs, name); 513 vxattr = ceph_match_vxattr(vxattrs, name);
514 514
515 spin_lock(&inode->i_lock); 515 spin_lock(&ci->i_ceph_lock);
516 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 516 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
517 ci->i_xattrs.version, ci->i_xattrs.index_version); 517 ci->i_xattrs.version, ci->i_xattrs.index_version);
518 518
@@ -520,14 +520,14 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
520 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 520 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
521 goto get_xattr; 521 goto get_xattr;
522 } else { 522 } else {
523 spin_unlock(&inode->i_lock); 523 spin_unlock(&ci->i_ceph_lock);
524 /* get xattrs from mds (if we don't already have them) */ 524 /* get xattrs from mds (if we don't already have them) */
525 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); 525 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
526 if (err) 526 if (err)
527 return err; 527 return err;
528 } 528 }
529 529
530 spin_lock(&inode->i_lock); 530 spin_lock(&ci->i_ceph_lock);
531 531
532 if (vxattr && vxattr->readonly) { 532 if (vxattr && vxattr->readonly) {
533 err = vxattr->getxattr_cb(ci, value, size); 533 err = vxattr->getxattr_cb(ci, value, size);
@@ -558,7 +558,7 @@ get_xattr:
558 memcpy(value, xattr->val, xattr->val_len); 558 memcpy(value, xattr->val, xattr->val_len);
559 559
560out: 560out:
561 spin_unlock(&inode->i_lock); 561 spin_unlock(&ci->i_ceph_lock);
562 return err; 562 return err;
563} 563}
564 564
@@ -573,7 +573,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
573 u32 len; 573 u32 len;
574 int i; 574 int i;
575 575
576 spin_lock(&inode->i_lock); 576 spin_lock(&ci->i_ceph_lock);
577 dout("listxattr %p ver=%lld index_ver=%lld\n", inode, 577 dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
578 ci->i_xattrs.version, ci->i_xattrs.index_version); 578 ci->i_xattrs.version, ci->i_xattrs.index_version);
579 579
@@ -581,13 +581,13 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
581 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 581 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
582 goto list_xattr; 582 goto list_xattr;
583 } else { 583 } else {
584 spin_unlock(&inode->i_lock); 584 spin_unlock(&ci->i_ceph_lock);
585 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); 585 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
586 if (err) 586 if (err)
587 return err; 587 return err;
588 } 588 }
589 589
590 spin_lock(&inode->i_lock); 590 spin_lock(&ci->i_ceph_lock);
591 591
592 err = __build_xattrs(inode); 592 err = __build_xattrs(inode);
593 if (err < 0) 593 if (err < 0)
@@ -619,7 +619,7 @@ list_xattr:
619 } 619 }
620 620
621out: 621out:
622 spin_unlock(&inode->i_lock); 622 spin_unlock(&ci->i_ceph_lock);
623 return err; 623 return err;
624} 624}
625 625
@@ -739,7 +739,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
739 if (!xattr) 739 if (!xattr)
740 goto out; 740 goto out;
741 741
742 spin_lock(&inode->i_lock); 742 spin_lock(&ci->i_ceph_lock);
743retry: 743retry:
744 issued = __ceph_caps_issued(ci, NULL); 744 issued = __ceph_caps_issued(ci, NULL);
745 if (!(issued & CEPH_CAP_XATTR_EXCL)) 745 if (!(issued & CEPH_CAP_XATTR_EXCL))
@@ -752,12 +752,12 @@ retry:
752 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 752 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
753 struct ceph_buffer *blob = NULL; 753 struct ceph_buffer *blob = NULL;
754 754
755 spin_unlock(&inode->i_lock); 755 spin_unlock(&ci->i_ceph_lock);
756 dout(" preaallocating new blob size=%d\n", required_blob_size); 756 dout(" preaallocating new blob size=%d\n", required_blob_size);
757 blob = ceph_buffer_new(required_blob_size, GFP_NOFS); 757 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
758 if (!blob) 758 if (!blob)
759 goto out; 759 goto out;
760 spin_lock(&inode->i_lock); 760 spin_lock(&ci->i_ceph_lock);
761 if (ci->i_xattrs.prealloc_blob) 761 if (ci->i_xattrs.prealloc_blob)
762 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 762 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
763 ci->i_xattrs.prealloc_blob = blob; 763 ci->i_xattrs.prealloc_blob = blob;
@@ -770,13 +770,13 @@ retry:
770 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 770 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
771 ci->i_xattrs.dirty = true; 771 ci->i_xattrs.dirty = true;
772 inode->i_ctime = CURRENT_TIME; 772 inode->i_ctime = CURRENT_TIME;
773 spin_unlock(&inode->i_lock); 773 spin_unlock(&ci->i_ceph_lock);
774 if (dirty) 774 if (dirty)
775 __mark_inode_dirty(inode, dirty); 775 __mark_inode_dirty(inode, dirty);
776 return err; 776 return err;
777 777
778do_sync: 778do_sync:
779 spin_unlock(&inode->i_lock); 779 spin_unlock(&ci->i_ceph_lock);
780 err = ceph_sync_setxattr(dentry, name, value, size, flags); 780 err = ceph_sync_setxattr(dentry, name, value, size, flags);
781out: 781out:
782 kfree(newname); 782 kfree(newname);
@@ -833,7 +833,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
833 return -EOPNOTSUPP; 833 return -EOPNOTSUPP;
834 } 834 }
835 835
836 spin_lock(&inode->i_lock); 836 spin_lock(&ci->i_ceph_lock);
837 __build_xattrs(inode); 837 __build_xattrs(inode);
838 issued = __ceph_caps_issued(ci, NULL); 838 issued = __ceph_caps_issued(ci, NULL);
839 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); 839 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
@@ -846,12 +846,12 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
846 ci->i_xattrs.dirty = true; 846 ci->i_xattrs.dirty = true;
847 inode->i_ctime = CURRENT_TIME; 847 inode->i_ctime = CURRENT_TIME;
848 848
849 spin_unlock(&inode->i_lock); 849 spin_unlock(&ci->i_ceph_lock);
850 if (dirty) 850 if (dirty)
851 __mark_inode_dirty(inode, dirty); 851 __mark_inode_dirty(inode, dirty);
852 return err; 852 return err;
853do_sync: 853do_sync:
854 spin_unlock(&inode->i_lock); 854 spin_unlock(&ci->i_ceph_lock);
855 err = ceph_send_removexattr(dentry, name); 855 err = ceph_send_removexattr(dentry, name);
856 return err; 856 return err;
857} 857}