aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ceph/caps.c34
-rw-r--r--fs/ceph/file.c2
-rw-r--r--fs/ceph/inode.c155
-rw-r--r--fs/ceph/mds_client.c28
-rw-r--r--fs/ceph/quota.c9
-rw-r--r--fs/ceph/snap.c16
-rw-r--r--fs/ceph/super.c28
-rw-r--r--fs/ceph/super.h19
8 files changed, 156 insertions, 135 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 72f8e1311392..0176241eaea7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2738,15 +2738,13 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
2738 _got = 0; 2738 _got = 0;
2739 ret = try_get_cap_refs(ci, need, want, endoff, 2739 ret = try_get_cap_refs(ci, need, want, endoff,
2740 false, &_got); 2740 false, &_got);
2741 if (ret == -EAGAIN) { 2741 if (ret == -EAGAIN)
2742 continue; 2742 continue;
2743 } else if (!ret) { 2743 if (!ret) {
2744 int err;
2745
2746 DEFINE_WAIT_FUNC(wait, woken_wake_function); 2744 DEFINE_WAIT_FUNC(wait, woken_wake_function);
2747 add_wait_queue(&ci->i_cap_wq, &wait); 2745 add_wait_queue(&ci->i_cap_wq, &wait);
2748 2746
2749 while (!(err = try_get_cap_refs(ci, need, want, endoff, 2747 while (!(ret = try_get_cap_refs(ci, need, want, endoff,
2750 true, &_got))) { 2748 true, &_got))) {
2751 if (signal_pending(current)) { 2749 if (signal_pending(current)) {
2752 ret = -ERESTARTSYS; 2750 ret = -ERESTARTSYS;
@@ -2756,14 +2754,16 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
2756 } 2754 }
2757 2755
2758 remove_wait_queue(&ci->i_cap_wq, &wait); 2756 remove_wait_queue(&ci->i_cap_wq, &wait);
2759 if (err == -EAGAIN) 2757 if (ret == -EAGAIN)
2760 continue; 2758 continue;
2761 } 2759 }
2762 if (ret == -ESTALE) { 2760 if (ret < 0) {
2763 /* session was killed, try renew caps */ 2761 if (ret == -ESTALE) {
2764 ret = ceph_renew_caps(&ci->vfs_inode); 2762 /* session was killed, try renew caps */
2765 if (ret == 0) 2763 ret = ceph_renew_caps(&ci->vfs_inode);
2766 continue; 2764 if (ret == 0)
2765 continue;
2766 }
2767 return ret; 2767 return ret;
2768 } 2768 }
2769 2769
@@ -2992,8 +2992,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2992 } 2992 }
2993 if (complete_capsnap) 2993 if (complete_capsnap)
2994 wake_up_all(&ci->i_cap_wq); 2994 wake_up_all(&ci->i_cap_wq);
2995 while (put-- > 0) 2995 while (put-- > 0) {
2996 iput(inode); 2996 /* avoid calling iput_final() in osd dispatch threads */
2997 ceph_async_iput(inode);
2998 }
2997} 2999}
2998 3000
2999/* 3001/*
@@ -3964,8 +3966,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3964done: 3966done:
3965 mutex_unlock(&session->s_mutex); 3967 mutex_unlock(&session->s_mutex);
3966done_unlocked: 3968done_unlocked:
3967 iput(inode);
3968 ceph_put_string(extra_info.pool_ns); 3969 ceph_put_string(extra_info.pool_ns);
3970 /* avoid calling iput_final() in mds dispatch threads */
3971 ceph_async_iput(inode);
3969 return; 3972 return;
3970 3973
3971flush_cap_releases: 3974flush_cap_releases:
@@ -4011,7 +4014,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
4011 if (inode) { 4014 if (inode) {
4012 dout("check_delayed_caps on %p\n", inode); 4015 dout("check_delayed_caps on %p\n", inode);
4013 ceph_check_caps(ci, flags, NULL); 4016 ceph_check_caps(ci, flags, NULL);
4014 iput(inode); 4017 /* avoid calling iput_final() in tick thread */
4018 ceph_async_iput(inode);
4015 } 4019 }
4016 } 4020 }
4017 spin_unlock(&mdsc->cap_delay_lock); 4021 spin_unlock(&mdsc->cap_delay_lock);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 305daf043eb0..183c37c0a8fc 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -791,7 +791,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
791 if (aio_work) { 791 if (aio_work) {
792 INIT_WORK(&aio_work->work, ceph_aio_retry_work); 792 INIT_WORK(&aio_work->work, ceph_aio_retry_work);
793 aio_work->req = req; 793 aio_work->req = req;
794 queue_work(ceph_inode_to_client(inode)->wb_wq, 794 queue_work(ceph_inode_to_client(inode)->inode_wq,
795 &aio_work->work); 795 &aio_work->work);
796 return; 796 return;
797 } 797 }
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index f85355bf49c4..761451f36e2d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -33,9 +33,7 @@
33 33
34static const struct inode_operations ceph_symlink_iops; 34static const struct inode_operations ceph_symlink_iops;
35 35
36static void ceph_invalidate_work(struct work_struct *work); 36static void ceph_inode_work(struct work_struct *work);
37static void ceph_writeback_work(struct work_struct *work);
38static void ceph_vmtruncate_work(struct work_struct *work);
39 37
40/* 38/*
41 * find or create an inode, given the ceph ino number 39 * find or create an inode, given the ceph ino number
@@ -509,10 +507,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
509 INIT_LIST_HEAD(&ci->i_snap_realm_item); 507 INIT_LIST_HEAD(&ci->i_snap_realm_item);
510 INIT_LIST_HEAD(&ci->i_snap_flush_item); 508 INIT_LIST_HEAD(&ci->i_snap_flush_item);
511 509
512 INIT_WORK(&ci->i_wb_work, ceph_writeback_work); 510 INIT_WORK(&ci->i_work, ceph_inode_work);
513 INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work); 511 ci->i_work_mask = 0;
514
515 INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work);
516 512
517 ceph_fscache_inode_init(ci); 513 ceph_fscache_inode_init(ci);
518 514
@@ -1480,7 +1476,8 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
1480 pr_err("fill_inode badness on %p got %d\n", in, rc); 1476 pr_err("fill_inode badness on %p got %d\n", in, rc);
1481 err = rc; 1477 err = rc;
1482 } 1478 }
1483 iput(in); 1479 /* avoid calling iput_final() in mds dispatch threads */
1480 ceph_async_iput(in);
1484 } 1481 }
1485 1482
1486 return err; 1483 return err;
@@ -1678,8 +1675,11 @@ retry_lookup:
1678 &req->r_caps_reservation); 1675 &req->r_caps_reservation);
1679 if (ret < 0) { 1676 if (ret < 0) {
1680 pr_err("fill_inode badness on %p\n", in); 1677 pr_err("fill_inode badness on %p\n", in);
1681 if (d_really_is_negative(dn)) 1678 if (d_really_is_negative(dn)) {
1682 iput(in); 1679 /* avoid calling iput_final() in mds
1680 * dispatch threads */
1681 ceph_async_iput(in);
1682 }
1683 d_drop(dn); 1683 d_drop(dn);
1684 err = ret; 1684 err = ret;
1685 goto next_item; 1685 goto next_item;
@@ -1689,7 +1689,7 @@ retry_lookup:
1689 if (ceph_security_xattr_deadlock(in)) { 1689 if (ceph_security_xattr_deadlock(in)) {
1690 dout(" skip splicing dn %p to inode %p" 1690 dout(" skip splicing dn %p to inode %p"
1691 " (security xattr deadlock)\n", dn, in); 1691 " (security xattr deadlock)\n", dn, in);
1692 iput(in); 1692 ceph_async_iput(in);
1693 skipped++; 1693 skipped++;
1694 goto next_item; 1694 goto next_item;
1695 } 1695 }
@@ -1741,56 +1741,86 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size)
1741} 1741}
1742 1742
1743/* 1743/*
1744 * Put reference to inode, but avoid calling iput_final() in current thread.
1745 * iput_final() may wait for reahahead pages. The wait can cause deadlock in
1746 * some contexts.
1747 */
1748void ceph_async_iput(struct inode *inode)
1749{
1750 if (!inode)
1751 return;
1752 for (;;) {
1753 if (atomic_add_unless(&inode->i_count, -1, 1))
1754 break;
1755 if (queue_work(ceph_inode_to_client(inode)->inode_wq,
1756 &ceph_inode(inode)->i_work))
1757 break;
1758 /* queue work failed, i_count must be at least 2 */
1759 }
1760}
1761
1762/*
1744 * Write back inode data in a worker thread. (This can't be done 1763 * Write back inode data in a worker thread. (This can't be done
1745 * in the message handler context.) 1764 * in the message handler context.)
1746 */ 1765 */
1747void ceph_queue_writeback(struct inode *inode) 1766void ceph_queue_writeback(struct inode *inode)
1748{ 1767{
1768 struct ceph_inode_info *ci = ceph_inode(inode);
1769 set_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask);
1770
1749 ihold(inode); 1771 ihold(inode);
1750 if (queue_work(ceph_inode_to_client(inode)->wb_wq, 1772 if (queue_work(ceph_inode_to_client(inode)->inode_wq,
1751 &ceph_inode(inode)->i_wb_work)) { 1773 &ci->i_work)) {
1752 dout("ceph_queue_writeback %p\n", inode); 1774 dout("ceph_queue_writeback %p\n", inode);
1753 } else { 1775 } else {
1754 dout("ceph_queue_writeback %p failed\n", inode); 1776 dout("ceph_queue_writeback %p already queued, mask=%lx\n",
1777 inode, ci->i_work_mask);
1755 iput(inode); 1778 iput(inode);
1756 } 1779 }
1757} 1780}
1758 1781
1759static void ceph_writeback_work(struct work_struct *work)
1760{
1761 struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
1762 i_wb_work);
1763 struct inode *inode = &ci->vfs_inode;
1764
1765 dout("writeback %p\n", inode);
1766 filemap_fdatawrite(&inode->i_data);
1767 iput(inode);
1768}
1769
1770/* 1782/*
1771 * queue an async invalidation 1783 * queue an async invalidation
1772 */ 1784 */
1773void ceph_queue_invalidate(struct inode *inode) 1785void ceph_queue_invalidate(struct inode *inode)
1774{ 1786{
1787 struct ceph_inode_info *ci = ceph_inode(inode);
1788 set_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask);
1789
1775 ihold(inode); 1790 ihold(inode);
1776 if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, 1791 if (queue_work(ceph_inode_to_client(inode)->inode_wq,
1777 &ceph_inode(inode)->i_pg_inv_work)) { 1792 &ceph_inode(inode)->i_work)) {
1778 dout("ceph_queue_invalidate %p\n", inode); 1793 dout("ceph_queue_invalidate %p\n", inode);
1779 } else { 1794 } else {
1780 dout("ceph_queue_invalidate %p failed\n", inode); 1795 dout("ceph_queue_invalidate %p already queued, mask=%lx\n",
1796 inode, ci->i_work_mask);
1781 iput(inode); 1797 iput(inode);
1782 } 1798 }
1783} 1799}
1784 1800
1785/* 1801/*
1786 * Invalidate inode pages in a worker thread. (This can't be done 1802 * Queue an async vmtruncate. If we fail to queue work, we will handle
1787 * in the message handler context.) 1803 * the truncation the next time we call __ceph_do_pending_vmtruncate.
1788 */ 1804 */
1789static void ceph_invalidate_work(struct work_struct *work) 1805void ceph_queue_vmtruncate(struct inode *inode)
1790{ 1806{
1791 struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, 1807 struct ceph_inode_info *ci = ceph_inode(inode);
1792 i_pg_inv_work); 1808 set_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask);
1793 struct inode *inode = &ci->vfs_inode; 1809
1810 ihold(inode);
1811 if (queue_work(ceph_inode_to_client(inode)->inode_wq,
1812 &ci->i_work)) {
1813 dout("ceph_queue_vmtruncate %p\n", inode);
1814 } else {
1815 dout("ceph_queue_vmtruncate %p already queued, mask=%lx\n",
1816 inode, ci->i_work_mask);
1817 iput(inode);
1818 }
1819}
1820
1821static void ceph_do_invalidate_pages(struct inode *inode)
1822{
1823 struct ceph_inode_info *ci = ceph_inode(inode);
1794 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 1824 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
1795 u32 orig_gen; 1825 u32 orig_gen;
1796 int check = 0; 1826 int check = 0;
@@ -1842,44 +1872,6 @@ static void ceph_invalidate_work(struct work_struct *work)
1842out: 1872out:
1843 if (check) 1873 if (check)
1844 ceph_check_caps(ci, 0, NULL); 1874 ceph_check_caps(ci, 0, NULL);
1845 iput(inode);
1846}
1847
1848
1849/*
1850 * called by trunc_wq;
1851 *
1852 * We also truncate in a separate thread as well.
1853 */
1854static void ceph_vmtruncate_work(struct work_struct *work)
1855{
1856 struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
1857 i_vmtruncate_work);
1858 struct inode *inode = &ci->vfs_inode;
1859
1860 dout("vmtruncate_work %p\n", inode);
1861 __ceph_do_pending_vmtruncate(inode);
1862 iput(inode);
1863}
1864
1865/*
1866 * Queue an async vmtruncate. If we fail to queue work, we will handle
1867 * the truncation the next time we call __ceph_do_pending_vmtruncate.
1868 */
1869void ceph_queue_vmtruncate(struct inode *inode)
1870{
1871 struct ceph_inode_info *ci = ceph_inode(inode);
1872
1873 ihold(inode);
1874
1875 if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
1876 &ci->i_vmtruncate_work)) {
1877 dout("ceph_queue_vmtruncate %p\n", inode);
1878 } else {
1879 dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
1880 inode, ci->i_truncate_pending);
1881 iput(inode);
1882 }
1883} 1875}
1884 1876
1885/* 1877/*
@@ -1943,6 +1935,25 @@ retry:
1943 wake_up_all(&ci->i_cap_wq); 1935 wake_up_all(&ci->i_cap_wq);
1944} 1936}
1945 1937
1938static void ceph_inode_work(struct work_struct *work)
1939{
1940 struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
1941 i_work);
1942 struct inode *inode = &ci->vfs_inode;
1943
1944 if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) {
1945 dout("writeback %p\n", inode);
1946 filemap_fdatawrite(&inode->i_data);
1947 }
1948 if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask))
1949 ceph_do_invalidate_pages(inode);
1950
1951 if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask))
1952 __ceph_do_pending_vmtruncate(inode);
1953
1954 iput(inode);
1955}
1956
1946/* 1957/*
1947 * symlinks 1958 * symlinks
1948 */ 1959 */
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 959b1bf7c327..6af2d0d4a87a 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -690,11 +690,12 @@ void ceph_mdsc_release_request(struct kref *kref)
690 ceph_msg_put(req->r_reply); 690 ceph_msg_put(req->r_reply);
691 if (req->r_inode) { 691 if (req->r_inode) {
692 ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); 692 ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
693 iput(req->r_inode); 693 /* avoid calling iput_final() in mds dispatch threads */
694 ceph_async_iput(req->r_inode);
694 } 695 }
695 if (req->r_parent) 696 if (req->r_parent)
696 ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); 697 ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
697 iput(req->r_target_inode); 698 ceph_async_iput(req->r_target_inode);
698 if (req->r_dentry) 699 if (req->r_dentry)
699 dput(req->r_dentry); 700 dput(req->r_dentry);
700 if (req->r_old_dentry) 701 if (req->r_old_dentry)
@@ -708,7 +709,7 @@ void ceph_mdsc_release_request(struct kref *kref)
708 */ 709 */
709 ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), 710 ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
710 CEPH_CAP_PIN); 711 CEPH_CAP_PIN);
711 iput(req->r_old_dentry_dir); 712 ceph_async_iput(req->r_old_dentry_dir);
712 } 713 }
713 kfree(req->r_path1); 714 kfree(req->r_path1);
714 kfree(req->r_path2); 715 kfree(req->r_path2);
@@ -818,7 +819,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
818 } 819 }
819 820
820 if (req->r_unsafe_dir) { 821 if (req->r_unsafe_dir) {
821 iput(req->r_unsafe_dir); 822 /* avoid calling iput_final() in mds dispatch threads */
823 ceph_async_iput(req->r_unsafe_dir);
822 req->r_unsafe_dir = NULL; 824 req->r_unsafe_dir = NULL;
823 } 825 }
824 826
@@ -983,7 +985,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
983 cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); 985 cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
984 if (!cap) { 986 if (!cap) {
985 spin_unlock(&ci->i_ceph_lock); 987 spin_unlock(&ci->i_ceph_lock);
986 iput(inode); 988 ceph_async_iput(inode);
987 goto random; 989 goto random;
988 } 990 }
989 mds = cap->session->s_mds; 991 mds = cap->session->s_mds;
@@ -992,7 +994,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
992 cap == ci->i_auth_cap ? "auth " : "", cap); 994 cap == ci->i_auth_cap ? "auth " : "", cap);
993 spin_unlock(&ci->i_ceph_lock); 995 spin_unlock(&ci->i_ceph_lock);
994out: 996out:
995 iput(inode); 997 /* avoid calling iput_final() while holding mdsc->mutex or
998 * in mds dispatch threads */
999 ceph_async_iput(inode);
996 return mds; 1000 return mds;
997 1001
998random: 1002random:
@@ -1302,7 +1306,9 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session,
1302 spin_unlock(&session->s_cap_lock); 1306 spin_unlock(&session->s_cap_lock);
1303 1307
1304 if (last_inode) { 1308 if (last_inode) {
1305 iput(last_inode); 1309 /* avoid calling iput_final() while holding
1310 * s_mutex or in mds dispatch threads */
1311 ceph_async_iput(last_inode);
1306 last_inode = NULL; 1312 last_inode = NULL;
1307 } 1313 }
1308 if (old_cap) { 1314 if (old_cap) {
@@ -1335,7 +1341,7 @@ out:
1335 session->s_cap_iterator = NULL; 1341 session->s_cap_iterator = NULL;
1336 spin_unlock(&session->s_cap_lock); 1342 spin_unlock(&session->s_cap_lock);
1337 1343
1338 iput(last_inode); 1344 ceph_async_iput(last_inode);
1339 if (old_cap) 1345 if (old_cap)
1340 ceph_put_cap(session->s_mdsc, old_cap); 1346 ceph_put_cap(session->s_mdsc, old_cap);
1341 1347
@@ -1471,7 +1477,8 @@ static void remove_session_caps(struct ceph_mds_session *session)
1471 spin_unlock(&session->s_cap_lock); 1477 spin_unlock(&session->s_cap_lock);
1472 1478
1473 inode = ceph_find_inode(sb, vino); 1479 inode = ceph_find_inode(sb, vino);
1474 iput(inode); 1480 /* avoid calling iput_final() while holding s_mutex */
1481 ceph_async_iput(inode);
1475 1482
1476 spin_lock(&session->s_cap_lock); 1483 spin_lock(&session->s_cap_lock);
1477 } 1484 }
@@ -3912,8 +3919,9 @@ release:
3912 ceph_con_send(&session->s_con, msg); 3919 ceph_con_send(&session->s_con, msg);
3913 3920
3914out: 3921out:
3915 iput(inode);
3916 mutex_unlock(&session->s_mutex); 3922 mutex_unlock(&session->s_mutex);
3923 /* avoid calling iput_final() in mds dispatch threads */
3924 ceph_async_iput(inode);
3917 return; 3925 return;
3918 3926
3919bad: 3927bad:
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index c4522212872c..d629fc857450 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -74,7 +74,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
74 le64_to_cpu(h->max_files)); 74 le64_to_cpu(h->max_files));
75 spin_unlock(&ci->i_ceph_lock); 75 spin_unlock(&ci->i_ceph_lock);
76 76
77 iput(inode); 77 /* avoid calling iput_final() in dispatch thread */
78 ceph_async_iput(inode);
78} 79}
79 80
80static struct ceph_quotarealm_inode * 81static struct ceph_quotarealm_inode *
@@ -235,7 +236,8 @@ restart:
235 236
236 ci = ceph_inode(in); 237 ci = ceph_inode(in);
237 has_quota = __ceph_has_any_quota(ci); 238 has_quota = __ceph_has_any_quota(ci);
238 iput(in); 239 /* avoid calling iput_final() while holding mdsc->snap_rwsem */
240 ceph_async_iput(in);
239 241
240 next = realm->parent; 242 next = realm->parent;
241 if (has_quota || !next) 243 if (has_quota || !next)
@@ -372,7 +374,8 @@ restart:
372 pr_warn("Invalid quota check op (%d)\n", op); 374 pr_warn("Invalid quota check op (%d)\n", op);
373 exceeded = true; /* Just break the loop */ 375 exceeded = true; /* Just break the loop */
374 } 376 }
375 iput(in); 377 /* avoid calling iput_final() while holding mdsc->snap_rwsem */
378 ceph_async_iput(in);
376 379
377 next = realm->parent; 380 next = realm->parent;
378 if (exceeded || !next) 381 if (exceeded || !next)
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index b26e12cd8ec3..72c6c022f02b 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -648,13 +648,15 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
648 if (!inode) 648 if (!inode)
649 continue; 649 continue;
650 spin_unlock(&realm->inodes_with_caps_lock); 650 spin_unlock(&realm->inodes_with_caps_lock);
651 iput(lastinode); 651 /* avoid calling iput_final() while holding
652 * mdsc->snap_rwsem or in mds dispatch threads */
653 ceph_async_iput(lastinode);
652 lastinode = inode; 654 lastinode = inode;
653 ceph_queue_cap_snap(ci); 655 ceph_queue_cap_snap(ci);
654 spin_lock(&realm->inodes_with_caps_lock); 656 spin_lock(&realm->inodes_with_caps_lock);
655 } 657 }
656 spin_unlock(&realm->inodes_with_caps_lock); 658 spin_unlock(&realm->inodes_with_caps_lock);
657 iput(lastinode); 659 ceph_async_iput(lastinode);
658 660
659 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); 661 dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino);
660} 662}
@@ -806,7 +808,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
806 ihold(inode); 808 ihold(inode);
807 spin_unlock(&mdsc->snap_flush_lock); 809 spin_unlock(&mdsc->snap_flush_lock);
808 ceph_flush_snaps(ci, &session); 810 ceph_flush_snaps(ci, &session);
809 iput(inode); 811 /* avoid calling iput_final() while holding
812 * session->s_mutex or in mds dispatch threads */
813 ceph_async_iput(inode);
810 spin_lock(&mdsc->snap_flush_lock); 814 spin_lock(&mdsc->snap_flush_lock);
811 } 815 }
812 spin_unlock(&mdsc->snap_flush_lock); 816 spin_unlock(&mdsc->snap_flush_lock);
@@ -950,12 +954,14 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
950 ceph_get_snap_realm(mdsc, realm); 954 ceph_get_snap_realm(mdsc, realm);
951 ceph_put_snap_realm(mdsc, oldrealm); 955 ceph_put_snap_realm(mdsc, oldrealm);
952 956
953 iput(inode); 957 /* avoid calling iput_final() while holding
958 * mdsc->snap_rwsem or mds in dispatch threads */
959 ceph_async_iput(inode);
954 continue; 960 continue;
955 961
956skip_inode: 962skip_inode:
957 spin_unlock(&ci->i_ceph_lock); 963 spin_unlock(&ci->i_ceph_lock);
958 iput(inode); 964 ceph_async_iput(inode);
959 } 965 }
960 966
961 /* we may have taken some of the old realm's children. */ 967 /* we may have taken some of the old realm's children. */
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 01be7c1bc4c6..d57fa60dcd43 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -672,18 +672,12 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
672 * The number of concurrent works can be high but they don't need 672 * The number of concurrent works can be high but they don't need
673 * to be processed in parallel, limit concurrency. 673 * to be processed in parallel, limit concurrency.
674 */ 674 */
675 fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); 675 fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0);
676 if (!fsc->wb_wq) 676 if (!fsc->inode_wq)
677 goto fail_client; 677 goto fail_client;
678 fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
679 if (!fsc->pg_inv_wq)
680 goto fail_wb_wq;
681 fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
682 if (!fsc->trunc_wq)
683 goto fail_pg_inv_wq;
684 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); 678 fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1);
685 if (!fsc->cap_wq) 679 if (!fsc->cap_wq)
686 goto fail_trunc_wq; 680 goto fail_inode_wq;
687 681
688 /* set up mempools */ 682 /* set up mempools */
689 err = -ENOMEM; 683 err = -ENOMEM;
@@ -697,12 +691,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
697 691
698fail_cap_wq: 692fail_cap_wq:
699 destroy_workqueue(fsc->cap_wq); 693 destroy_workqueue(fsc->cap_wq);
700fail_trunc_wq: 694fail_inode_wq:
701 destroy_workqueue(fsc->trunc_wq); 695 destroy_workqueue(fsc->inode_wq);
702fail_pg_inv_wq:
703 destroy_workqueue(fsc->pg_inv_wq);
704fail_wb_wq:
705 destroy_workqueue(fsc->wb_wq);
706fail_client: 696fail_client:
707 ceph_destroy_client(fsc->client); 697 ceph_destroy_client(fsc->client);
708fail: 698fail:
@@ -715,9 +705,7 @@ fail:
715 705
716static void flush_fs_workqueues(struct ceph_fs_client *fsc) 706static void flush_fs_workqueues(struct ceph_fs_client *fsc)
717{ 707{
718 flush_workqueue(fsc->wb_wq); 708 flush_workqueue(fsc->inode_wq);
719 flush_workqueue(fsc->pg_inv_wq);
720 flush_workqueue(fsc->trunc_wq);
721 flush_workqueue(fsc->cap_wq); 709 flush_workqueue(fsc->cap_wq);
722} 710}
723 711
@@ -725,9 +713,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
725{ 713{
726 dout("destroy_fs_client %p\n", fsc); 714 dout("destroy_fs_client %p\n", fsc);
727 715
728 destroy_workqueue(fsc->wb_wq); 716 destroy_workqueue(fsc->inode_wq);
729 destroy_workqueue(fsc->pg_inv_wq);
730 destroy_workqueue(fsc->trunc_wq);
731 destroy_workqueue(fsc->cap_wq); 717 destroy_workqueue(fsc->cap_wq);
732 718
733 mempool_destroy(fsc->wb_pagevec_pool); 719 mempool_destroy(fsc->wb_pagevec_pool);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 6edab9a750f8..5f27e1f7f2d6 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -109,9 +109,7 @@ struct ceph_fs_client {
109 mempool_t *wb_pagevec_pool; 109 mempool_t *wb_pagevec_pool;
110 atomic_long_t writeback_count; 110 atomic_long_t writeback_count;
111 111
112 struct workqueue_struct *wb_wq; 112 struct workqueue_struct *inode_wq;
113 struct workqueue_struct *pg_inv_wq;
114 struct workqueue_struct *trunc_wq;
115 struct workqueue_struct *cap_wq; 113 struct workqueue_struct *cap_wq;
116 114
117#ifdef CONFIG_DEBUG_FS 115#ifdef CONFIG_DEBUG_FS
@@ -387,10 +385,8 @@ struct ceph_inode_info {
387 struct list_head i_snap_realm_item; 385 struct list_head i_snap_realm_item;
388 struct list_head i_snap_flush_item; 386 struct list_head i_snap_flush_item;
389 387
390 struct work_struct i_wb_work; /* writeback work */ 388 struct work_struct i_work;
391 struct work_struct i_pg_inv_work; /* page invalidation work */ 389 unsigned long i_work_mask;
392
393 struct work_struct i_vmtruncate_work;
394 390
395#ifdef CONFIG_CEPH_FSCACHE 391#ifdef CONFIG_CEPH_FSCACHE
396 struct fscache_cookie *fscache; 392 struct fscache_cookie *fscache;
@@ -513,6 +509,13 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
513 509
514 510
515/* 511/*
512 * Masks of ceph inode work.
513 */
514#define CEPH_I_WORK_WRITEBACK 0 /* writeback */
515#define CEPH_I_WORK_INVALIDATE_PAGES 1 /* invalidate pages */
516#define CEPH_I_WORK_VMTRUNCATE 2 /* vmtruncate */
517
518/*
516 * We set the ERROR_WRITE bit when we start seeing write errors on an inode 519 * We set the ERROR_WRITE bit when we start seeing write errors on an inode
517 * and then clear it when they start succeeding. Note that we do a lockless 520 * and then clear it when they start succeeding. Note that we do a lockless
518 * check first, and only take the lock if it looks like it needs to be changed. 521 * check first, and only take the lock if it looks like it needs to be changed.
@@ -896,9 +899,9 @@ extern int ceph_inode_holds_cap(struct inode *inode, int mask);
896extern bool ceph_inode_set_size(struct inode *inode, loff_t size); 899extern bool ceph_inode_set_size(struct inode *inode, loff_t size);
897extern void __ceph_do_pending_vmtruncate(struct inode *inode); 900extern void __ceph_do_pending_vmtruncate(struct inode *inode);
898extern void ceph_queue_vmtruncate(struct inode *inode); 901extern void ceph_queue_vmtruncate(struct inode *inode);
899
900extern void ceph_queue_invalidate(struct inode *inode); 902extern void ceph_queue_invalidate(struct inode *inode);
901extern void ceph_queue_writeback(struct inode *inode); 903extern void ceph_queue_writeback(struct inode *inode);
904extern void ceph_async_iput(struct inode *inode);
902 905
903extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page, 906extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
904 int mask, bool force); 907 int mask, bool force);