diff options
| -rw-r--r-- | fs/ceph/caps.c | 34 | ||||
| -rw-r--r-- | fs/ceph/file.c | 2 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 155 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 28 | ||||
| -rw-r--r-- | fs/ceph/quota.c | 9 | ||||
| -rw-r--r-- | fs/ceph/snap.c | 16 | ||||
| -rw-r--r-- | fs/ceph/super.c | 28 | ||||
| -rw-r--r-- | fs/ceph/super.h | 19 |
8 files changed, 156 insertions, 135 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 72f8e1311392..0176241eaea7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -2738,15 +2738,13 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | |||
| 2738 | _got = 0; | 2738 | _got = 0; |
| 2739 | ret = try_get_cap_refs(ci, need, want, endoff, | 2739 | ret = try_get_cap_refs(ci, need, want, endoff, |
| 2740 | false, &_got); | 2740 | false, &_got); |
| 2741 | if (ret == -EAGAIN) { | 2741 | if (ret == -EAGAIN) |
| 2742 | continue; | 2742 | continue; |
| 2743 | } else if (!ret) { | 2743 | if (!ret) { |
| 2744 | int err; | ||
| 2745 | |||
| 2746 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | 2744 | DEFINE_WAIT_FUNC(wait, woken_wake_function); |
| 2747 | add_wait_queue(&ci->i_cap_wq, &wait); | 2745 | add_wait_queue(&ci->i_cap_wq, &wait); |
| 2748 | 2746 | ||
| 2749 | while (!(err = try_get_cap_refs(ci, need, want, endoff, | 2747 | while (!(ret = try_get_cap_refs(ci, need, want, endoff, |
| 2750 | true, &_got))) { | 2748 | true, &_got))) { |
| 2751 | if (signal_pending(current)) { | 2749 | if (signal_pending(current)) { |
| 2752 | ret = -ERESTARTSYS; | 2750 | ret = -ERESTARTSYS; |
| @@ -2756,14 +2754,16 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, | |||
| 2756 | } | 2754 | } |
| 2757 | 2755 | ||
| 2758 | remove_wait_queue(&ci->i_cap_wq, &wait); | 2756 | remove_wait_queue(&ci->i_cap_wq, &wait); |
| 2759 | if (err == -EAGAIN) | 2757 | if (ret == -EAGAIN) |
| 2760 | continue; | 2758 | continue; |
| 2761 | } | 2759 | } |
| 2762 | if (ret == -ESTALE) { | 2760 | if (ret < 0) { |
| 2763 | /* session was killed, try renew caps */ | 2761 | if (ret == -ESTALE) { |
| 2764 | ret = ceph_renew_caps(&ci->vfs_inode); | 2762 | /* session was killed, try renew caps */ |
| 2765 | if (ret == 0) | 2763 | ret = ceph_renew_caps(&ci->vfs_inode); |
| 2766 | continue; | 2764 | if (ret == 0) |
| 2765 | continue; | ||
| 2766 | } | ||
| 2767 | return ret; | 2767 | return ret; |
| 2768 | } | 2768 | } |
| 2769 | 2769 | ||
| @@ -2992,8 +2992,10 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | |||
| 2992 | } | 2992 | } |
| 2993 | if (complete_capsnap) | 2993 | if (complete_capsnap) |
| 2994 | wake_up_all(&ci->i_cap_wq); | 2994 | wake_up_all(&ci->i_cap_wq); |
| 2995 | while (put-- > 0) | 2995 | while (put-- > 0) { |
| 2996 | iput(inode); | 2996 | /* avoid calling iput_final() in osd dispatch threads */ |
| 2997 | ceph_async_iput(inode); | ||
| 2998 | } | ||
| 2997 | } | 2999 | } |
| 2998 | 3000 | ||
| 2999 | /* | 3001 | /* |
| @@ -3964,8 +3966,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 3964 | done: | 3966 | done: |
| 3965 | mutex_unlock(&session->s_mutex); | 3967 | mutex_unlock(&session->s_mutex); |
| 3966 | done_unlocked: | 3968 | done_unlocked: |
| 3967 | iput(inode); | ||
| 3968 | ceph_put_string(extra_info.pool_ns); | 3969 | ceph_put_string(extra_info.pool_ns); |
| 3970 | /* avoid calling iput_final() in mds dispatch threads */ | ||
| 3971 | ceph_async_iput(inode); | ||
| 3969 | return; | 3972 | return; |
| 3970 | 3973 | ||
| 3971 | flush_cap_releases: | 3974 | flush_cap_releases: |
| @@ -4011,7 +4014,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) | |||
| 4011 | if (inode) { | 4014 | if (inode) { |
| 4012 | dout("check_delayed_caps on %p\n", inode); | 4015 | dout("check_delayed_caps on %p\n", inode); |
| 4013 | ceph_check_caps(ci, flags, NULL); | 4016 | ceph_check_caps(ci, flags, NULL); |
| 4014 | iput(inode); | 4017 | /* avoid calling iput_final() in tick thread */ |
| 4018 | ceph_async_iput(inode); | ||
| 4015 | } | 4019 | } |
| 4016 | } | 4020 | } |
| 4017 | spin_unlock(&mdsc->cap_delay_lock); | 4021 | spin_unlock(&mdsc->cap_delay_lock); |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 305daf043eb0..183c37c0a8fc 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
| @@ -791,7 +791,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) | |||
| 791 | if (aio_work) { | 791 | if (aio_work) { |
| 792 | INIT_WORK(&aio_work->work, ceph_aio_retry_work); | 792 | INIT_WORK(&aio_work->work, ceph_aio_retry_work); |
| 793 | aio_work->req = req; | 793 | aio_work->req = req; |
| 794 | queue_work(ceph_inode_to_client(inode)->wb_wq, | 794 | queue_work(ceph_inode_to_client(inode)->inode_wq, |
| 795 | &aio_work->work); | 795 | &aio_work->work); |
| 796 | return; | 796 | return; |
| 797 | } | 797 | } |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index f85355bf49c4..761451f36e2d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -33,9 +33,7 @@ | |||
| 33 | 33 | ||
| 34 | static const struct inode_operations ceph_symlink_iops; | 34 | static const struct inode_operations ceph_symlink_iops; |
| 35 | 35 | ||
| 36 | static void ceph_invalidate_work(struct work_struct *work); | 36 | static void ceph_inode_work(struct work_struct *work); |
| 37 | static void ceph_writeback_work(struct work_struct *work); | ||
| 38 | static void ceph_vmtruncate_work(struct work_struct *work); | ||
| 39 | 37 | ||
| 40 | /* | 38 | /* |
| 41 | * find or create an inode, given the ceph ino number | 39 | * find or create an inode, given the ceph ino number |
| @@ -509,10 +507,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) | |||
| 509 | INIT_LIST_HEAD(&ci->i_snap_realm_item); | 507 | INIT_LIST_HEAD(&ci->i_snap_realm_item); |
| 510 | INIT_LIST_HEAD(&ci->i_snap_flush_item); | 508 | INIT_LIST_HEAD(&ci->i_snap_flush_item); |
| 511 | 509 | ||
| 512 | INIT_WORK(&ci->i_wb_work, ceph_writeback_work); | 510 | INIT_WORK(&ci->i_work, ceph_inode_work); |
| 513 | INIT_WORK(&ci->i_pg_inv_work, ceph_invalidate_work); | 511 | ci->i_work_mask = 0; |
| 514 | |||
| 515 | INIT_WORK(&ci->i_vmtruncate_work, ceph_vmtruncate_work); | ||
| 516 | 512 | ||
| 517 | ceph_fscache_inode_init(ci); | 513 | ceph_fscache_inode_init(ci); |
| 518 | 514 | ||
| @@ -1480,7 +1476,8 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, | |||
| 1480 | pr_err("fill_inode badness on %p got %d\n", in, rc); | 1476 | pr_err("fill_inode badness on %p got %d\n", in, rc); |
| 1481 | err = rc; | 1477 | err = rc; |
| 1482 | } | 1478 | } |
| 1483 | iput(in); | 1479 | /* avoid calling iput_final() in mds dispatch threads */ |
| 1480 | ceph_async_iput(in); | ||
| 1484 | } | 1481 | } |
| 1485 | 1482 | ||
| 1486 | return err; | 1483 | return err; |
| @@ -1678,8 +1675,11 @@ retry_lookup: | |||
| 1678 | &req->r_caps_reservation); | 1675 | &req->r_caps_reservation); |
| 1679 | if (ret < 0) { | 1676 | if (ret < 0) { |
| 1680 | pr_err("fill_inode badness on %p\n", in); | 1677 | pr_err("fill_inode badness on %p\n", in); |
| 1681 | if (d_really_is_negative(dn)) | 1678 | if (d_really_is_negative(dn)) { |
| 1682 | iput(in); | 1679 | /* avoid calling iput_final() in mds |
| 1680 | * dispatch threads */ | ||
| 1681 | ceph_async_iput(in); | ||
| 1682 | } | ||
| 1683 | d_drop(dn); | 1683 | d_drop(dn); |
| 1684 | err = ret; | 1684 | err = ret; |
| 1685 | goto next_item; | 1685 | goto next_item; |
| @@ -1689,7 +1689,7 @@ retry_lookup: | |||
| 1689 | if (ceph_security_xattr_deadlock(in)) { | 1689 | if (ceph_security_xattr_deadlock(in)) { |
| 1690 | dout(" skip splicing dn %p to inode %p" | 1690 | dout(" skip splicing dn %p to inode %p" |
| 1691 | " (security xattr deadlock)\n", dn, in); | 1691 | " (security xattr deadlock)\n", dn, in); |
| 1692 | iput(in); | 1692 | ceph_async_iput(in); |
| 1693 | skipped++; | 1693 | skipped++; |
| 1694 | goto next_item; | 1694 | goto next_item; |
| 1695 | } | 1695 | } |
| @@ -1741,56 +1741,86 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size) | |||
| 1741 | } | 1741 | } |
| 1742 | 1742 | ||
| 1743 | /* | 1743 | /* |
| 1744 | * Put reference to inode, but avoid calling iput_final() in current thread. | ||
| 1745 | * iput_final() may wait for reahahead pages. The wait can cause deadlock in | ||
| 1746 | * some contexts. | ||
| 1747 | */ | ||
| 1748 | void ceph_async_iput(struct inode *inode) | ||
| 1749 | { | ||
| 1750 | if (!inode) | ||
| 1751 | return; | ||
| 1752 | for (;;) { | ||
| 1753 | if (atomic_add_unless(&inode->i_count, -1, 1)) | ||
| 1754 | break; | ||
| 1755 | if (queue_work(ceph_inode_to_client(inode)->inode_wq, | ||
| 1756 | &ceph_inode(inode)->i_work)) | ||
| 1757 | break; | ||
| 1758 | /* queue work failed, i_count must be at least 2 */ | ||
| 1759 | } | ||
| 1760 | } | ||
| 1761 | |||
| 1762 | /* | ||
| 1744 | * Write back inode data in a worker thread. (This can't be done | 1763 | * Write back inode data in a worker thread. (This can't be done |
| 1745 | * in the message handler context.) | 1764 | * in the message handler context.) |
| 1746 | */ | 1765 | */ |
| 1747 | void ceph_queue_writeback(struct inode *inode) | 1766 | void ceph_queue_writeback(struct inode *inode) |
| 1748 | { | 1767 | { |
| 1768 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 1769 | set_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask); | ||
| 1770 | |||
| 1749 | ihold(inode); | 1771 | ihold(inode); |
| 1750 | if (queue_work(ceph_inode_to_client(inode)->wb_wq, | 1772 | if (queue_work(ceph_inode_to_client(inode)->inode_wq, |
| 1751 | &ceph_inode(inode)->i_wb_work)) { | 1773 | &ci->i_work)) { |
| 1752 | dout("ceph_queue_writeback %p\n", inode); | 1774 | dout("ceph_queue_writeback %p\n", inode); |
| 1753 | } else { | 1775 | } else { |
| 1754 | dout("ceph_queue_writeback %p failed\n", inode); | 1776 | dout("ceph_queue_writeback %p already queued, mask=%lx\n", |
| 1777 | inode, ci->i_work_mask); | ||
| 1755 | iput(inode); | 1778 | iput(inode); |
| 1756 | } | 1779 | } |
| 1757 | } | 1780 | } |
| 1758 | 1781 | ||
| 1759 | static void ceph_writeback_work(struct work_struct *work) | ||
| 1760 | { | ||
| 1761 | struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, | ||
| 1762 | i_wb_work); | ||
| 1763 | struct inode *inode = &ci->vfs_inode; | ||
| 1764 | |||
| 1765 | dout("writeback %p\n", inode); | ||
| 1766 | filemap_fdatawrite(&inode->i_data); | ||
| 1767 | iput(inode); | ||
| 1768 | } | ||
| 1769 | |||
| 1770 | /* | 1782 | /* |
| 1771 | * queue an async invalidation | 1783 | * queue an async invalidation |
| 1772 | */ | 1784 | */ |
| 1773 | void ceph_queue_invalidate(struct inode *inode) | 1785 | void ceph_queue_invalidate(struct inode *inode) |
| 1774 | { | 1786 | { |
| 1787 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 1788 | set_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask); | ||
| 1789 | |||
| 1775 | ihold(inode); | 1790 | ihold(inode); |
| 1776 | if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq, | 1791 | if (queue_work(ceph_inode_to_client(inode)->inode_wq, |
| 1777 | &ceph_inode(inode)->i_pg_inv_work)) { | 1792 | &ceph_inode(inode)->i_work)) { |
| 1778 | dout("ceph_queue_invalidate %p\n", inode); | 1793 | dout("ceph_queue_invalidate %p\n", inode); |
| 1779 | } else { | 1794 | } else { |
| 1780 | dout("ceph_queue_invalidate %p failed\n", inode); | 1795 | dout("ceph_queue_invalidate %p already queued, mask=%lx\n", |
| 1796 | inode, ci->i_work_mask); | ||
| 1781 | iput(inode); | 1797 | iput(inode); |
| 1782 | } | 1798 | } |
| 1783 | } | 1799 | } |
| 1784 | 1800 | ||
| 1785 | /* | 1801 | /* |
| 1786 | * Invalidate inode pages in a worker thread. (This can't be done | 1802 | * Queue an async vmtruncate. If we fail to queue work, we will handle |
| 1787 | * in the message handler context.) | 1803 | * the truncation the next time we call __ceph_do_pending_vmtruncate. |
| 1788 | */ | 1804 | */ |
| 1789 | static void ceph_invalidate_work(struct work_struct *work) | 1805 | void ceph_queue_vmtruncate(struct inode *inode) |
| 1790 | { | 1806 | { |
| 1791 | struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, | 1807 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1792 | i_pg_inv_work); | 1808 | set_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask); |
| 1793 | struct inode *inode = &ci->vfs_inode; | 1809 | |
| 1810 | ihold(inode); | ||
| 1811 | if (queue_work(ceph_inode_to_client(inode)->inode_wq, | ||
| 1812 | &ci->i_work)) { | ||
| 1813 | dout("ceph_queue_vmtruncate %p\n", inode); | ||
| 1814 | } else { | ||
| 1815 | dout("ceph_queue_vmtruncate %p already queued, mask=%lx\n", | ||
| 1816 | inode, ci->i_work_mask); | ||
| 1817 | iput(inode); | ||
| 1818 | } | ||
| 1819 | } | ||
| 1820 | |||
| 1821 | static void ceph_do_invalidate_pages(struct inode *inode) | ||
| 1822 | { | ||
| 1823 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 1794 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 1824 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 1795 | u32 orig_gen; | 1825 | u32 orig_gen; |
| 1796 | int check = 0; | 1826 | int check = 0; |
| @@ -1842,44 +1872,6 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
| 1842 | out: | 1872 | out: |
| 1843 | if (check) | 1873 | if (check) |
| 1844 | ceph_check_caps(ci, 0, NULL); | 1874 | ceph_check_caps(ci, 0, NULL); |
| 1845 | iput(inode); | ||
| 1846 | } | ||
| 1847 | |||
| 1848 | |||
| 1849 | /* | ||
| 1850 | * called by trunc_wq; | ||
| 1851 | * | ||
| 1852 | * We also truncate in a separate thread as well. | ||
| 1853 | */ | ||
| 1854 | static void ceph_vmtruncate_work(struct work_struct *work) | ||
| 1855 | { | ||
| 1856 | struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, | ||
| 1857 | i_vmtruncate_work); | ||
| 1858 | struct inode *inode = &ci->vfs_inode; | ||
| 1859 | |||
| 1860 | dout("vmtruncate_work %p\n", inode); | ||
| 1861 | __ceph_do_pending_vmtruncate(inode); | ||
| 1862 | iput(inode); | ||
| 1863 | } | ||
| 1864 | |||
| 1865 | /* | ||
| 1866 | * Queue an async vmtruncate. If we fail to queue work, we will handle | ||
| 1867 | * the truncation the next time we call __ceph_do_pending_vmtruncate. | ||
| 1868 | */ | ||
| 1869 | void ceph_queue_vmtruncate(struct inode *inode) | ||
| 1870 | { | ||
| 1871 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
| 1872 | |||
| 1873 | ihold(inode); | ||
| 1874 | |||
| 1875 | if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq, | ||
| 1876 | &ci->i_vmtruncate_work)) { | ||
| 1877 | dout("ceph_queue_vmtruncate %p\n", inode); | ||
| 1878 | } else { | ||
| 1879 | dout("ceph_queue_vmtruncate %p failed, pending=%d\n", | ||
| 1880 | inode, ci->i_truncate_pending); | ||
| 1881 | iput(inode); | ||
| 1882 | } | ||
| 1883 | } | 1875 | } |
| 1884 | 1876 | ||
| 1885 | /* | 1877 | /* |
| @@ -1943,6 +1935,25 @@ retry: | |||
| 1943 | wake_up_all(&ci->i_cap_wq); | 1935 | wake_up_all(&ci->i_cap_wq); |
| 1944 | } | 1936 | } |
| 1945 | 1937 | ||
| 1938 | static void ceph_inode_work(struct work_struct *work) | ||
| 1939 | { | ||
| 1940 | struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, | ||
| 1941 | i_work); | ||
| 1942 | struct inode *inode = &ci->vfs_inode; | ||
| 1943 | |||
| 1944 | if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) { | ||
| 1945 | dout("writeback %p\n", inode); | ||
| 1946 | filemap_fdatawrite(&inode->i_data); | ||
| 1947 | } | ||
| 1948 | if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask)) | ||
| 1949 | ceph_do_invalidate_pages(inode); | ||
| 1950 | |||
| 1951 | if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask)) | ||
| 1952 | __ceph_do_pending_vmtruncate(inode); | ||
| 1953 | |||
| 1954 | iput(inode); | ||
| 1955 | } | ||
| 1956 | |||
| 1946 | /* | 1957 | /* |
| 1947 | * symlinks | 1958 | * symlinks |
| 1948 | */ | 1959 | */ |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 959b1bf7c327..6af2d0d4a87a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -690,11 +690,12 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
| 690 | ceph_msg_put(req->r_reply); | 690 | ceph_msg_put(req->r_reply); |
| 691 | if (req->r_inode) { | 691 | if (req->r_inode) { |
| 692 | ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); | 692 | ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); |
| 693 | iput(req->r_inode); | 693 | /* avoid calling iput_final() in mds dispatch threads */ |
| 694 | ceph_async_iput(req->r_inode); | ||
| 694 | } | 695 | } |
| 695 | if (req->r_parent) | 696 | if (req->r_parent) |
| 696 | ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); | 697 | ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); |
| 697 | iput(req->r_target_inode); | 698 | ceph_async_iput(req->r_target_inode); |
| 698 | if (req->r_dentry) | 699 | if (req->r_dentry) |
| 699 | dput(req->r_dentry); | 700 | dput(req->r_dentry); |
| 700 | if (req->r_old_dentry) | 701 | if (req->r_old_dentry) |
| @@ -708,7 +709,7 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
| 708 | */ | 709 | */ |
| 709 | ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), | 710 | ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), |
| 710 | CEPH_CAP_PIN); | 711 | CEPH_CAP_PIN); |
| 711 | iput(req->r_old_dentry_dir); | 712 | ceph_async_iput(req->r_old_dentry_dir); |
| 712 | } | 713 | } |
| 713 | kfree(req->r_path1); | 714 | kfree(req->r_path1); |
| 714 | kfree(req->r_path2); | 715 | kfree(req->r_path2); |
| @@ -818,7 +819,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
| 818 | } | 819 | } |
| 819 | 820 | ||
| 820 | if (req->r_unsafe_dir) { | 821 | if (req->r_unsafe_dir) { |
| 821 | iput(req->r_unsafe_dir); | 822 | /* avoid calling iput_final() in mds dispatch threads */ |
| 823 | ceph_async_iput(req->r_unsafe_dir); | ||
| 822 | req->r_unsafe_dir = NULL; | 824 | req->r_unsafe_dir = NULL; |
| 823 | } | 825 | } |
| 824 | 826 | ||
| @@ -983,7 +985,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
| 983 | cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); | 985 | cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); |
| 984 | if (!cap) { | 986 | if (!cap) { |
| 985 | spin_unlock(&ci->i_ceph_lock); | 987 | spin_unlock(&ci->i_ceph_lock); |
| 986 | iput(inode); | 988 | ceph_async_iput(inode); |
| 987 | goto random; | 989 | goto random; |
| 988 | } | 990 | } |
| 989 | mds = cap->session->s_mds; | 991 | mds = cap->session->s_mds; |
| @@ -992,7 +994,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
| 992 | cap == ci->i_auth_cap ? "auth " : "", cap); | 994 | cap == ci->i_auth_cap ? "auth " : "", cap); |
| 993 | spin_unlock(&ci->i_ceph_lock); | 995 | spin_unlock(&ci->i_ceph_lock); |
| 994 | out: | 996 | out: |
| 995 | iput(inode); | 997 | /* avoid calling iput_final() while holding mdsc->mutex or |
| 998 | * in mds dispatch threads */ | ||
| 999 | ceph_async_iput(inode); | ||
| 996 | return mds; | 1000 | return mds; |
| 997 | 1001 | ||
| 998 | random: | 1002 | random: |
| @@ -1302,7 +1306,9 @@ int ceph_iterate_session_caps(struct ceph_mds_session *session, | |||
| 1302 | spin_unlock(&session->s_cap_lock); | 1306 | spin_unlock(&session->s_cap_lock); |
| 1303 | 1307 | ||
| 1304 | if (last_inode) { | 1308 | if (last_inode) { |
| 1305 | iput(last_inode); | 1309 | /* avoid calling iput_final() while holding |
| 1310 | * s_mutex or in mds dispatch threads */ | ||
| 1311 | ceph_async_iput(last_inode); | ||
| 1306 | last_inode = NULL; | 1312 | last_inode = NULL; |
| 1307 | } | 1313 | } |
| 1308 | if (old_cap) { | 1314 | if (old_cap) { |
| @@ -1335,7 +1341,7 @@ out: | |||
| 1335 | session->s_cap_iterator = NULL; | 1341 | session->s_cap_iterator = NULL; |
| 1336 | spin_unlock(&session->s_cap_lock); | 1342 | spin_unlock(&session->s_cap_lock); |
| 1337 | 1343 | ||
| 1338 | iput(last_inode); | 1344 | ceph_async_iput(last_inode); |
| 1339 | if (old_cap) | 1345 | if (old_cap) |
| 1340 | ceph_put_cap(session->s_mdsc, old_cap); | 1346 | ceph_put_cap(session->s_mdsc, old_cap); |
| 1341 | 1347 | ||
| @@ -1471,7 +1477,8 @@ static void remove_session_caps(struct ceph_mds_session *session) | |||
| 1471 | spin_unlock(&session->s_cap_lock); | 1477 | spin_unlock(&session->s_cap_lock); |
| 1472 | 1478 | ||
| 1473 | inode = ceph_find_inode(sb, vino); | 1479 | inode = ceph_find_inode(sb, vino); |
| 1474 | iput(inode); | 1480 | /* avoid calling iput_final() while holding s_mutex */ |
| 1481 | ceph_async_iput(inode); | ||
| 1475 | 1482 | ||
| 1476 | spin_lock(&session->s_cap_lock); | 1483 | spin_lock(&session->s_cap_lock); |
| 1477 | } | 1484 | } |
| @@ -3912,8 +3919,9 @@ release: | |||
| 3912 | ceph_con_send(&session->s_con, msg); | 3919 | ceph_con_send(&session->s_con, msg); |
| 3913 | 3920 | ||
| 3914 | out: | 3921 | out: |
| 3915 | iput(inode); | ||
| 3916 | mutex_unlock(&session->s_mutex); | 3922 | mutex_unlock(&session->s_mutex); |
| 3923 | /* avoid calling iput_final() in mds dispatch threads */ | ||
| 3924 | ceph_async_iput(inode); | ||
| 3917 | return; | 3925 | return; |
| 3918 | 3926 | ||
| 3919 | bad: | 3927 | bad: |
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index c4522212872c..d629fc857450 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c | |||
| @@ -74,7 +74,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc, | |||
| 74 | le64_to_cpu(h->max_files)); | 74 | le64_to_cpu(h->max_files)); |
| 75 | spin_unlock(&ci->i_ceph_lock); | 75 | spin_unlock(&ci->i_ceph_lock); |
| 76 | 76 | ||
| 77 | iput(inode); | 77 | /* avoid calling iput_final() in dispatch thread */ |
| 78 | ceph_async_iput(inode); | ||
| 78 | } | 79 | } |
| 79 | 80 | ||
| 80 | static struct ceph_quotarealm_inode * | 81 | static struct ceph_quotarealm_inode * |
| @@ -235,7 +236,8 @@ restart: | |||
| 235 | 236 | ||
| 236 | ci = ceph_inode(in); | 237 | ci = ceph_inode(in); |
| 237 | has_quota = __ceph_has_any_quota(ci); | 238 | has_quota = __ceph_has_any_quota(ci); |
| 238 | iput(in); | 239 | /* avoid calling iput_final() while holding mdsc->snap_rwsem */ |
| 240 | ceph_async_iput(in); | ||
| 239 | 241 | ||
| 240 | next = realm->parent; | 242 | next = realm->parent; |
| 241 | if (has_quota || !next) | 243 | if (has_quota || !next) |
| @@ -372,7 +374,8 @@ restart: | |||
| 372 | pr_warn("Invalid quota check op (%d)\n", op); | 374 | pr_warn("Invalid quota check op (%d)\n", op); |
| 373 | exceeded = true; /* Just break the loop */ | 375 | exceeded = true; /* Just break the loop */ |
| 374 | } | 376 | } |
| 375 | iput(in); | 377 | /* avoid calling iput_final() while holding mdsc->snap_rwsem */ |
| 378 | ceph_async_iput(in); | ||
| 376 | 379 | ||
| 377 | next = realm->parent; | 380 | next = realm->parent; |
| 378 | if (exceeded || !next) | 381 | if (exceeded || !next) |
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index b26e12cd8ec3..72c6c022f02b 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
| @@ -648,13 +648,15 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) | |||
| 648 | if (!inode) | 648 | if (!inode) |
| 649 | continue; | 649 | continue; |
| 650 | spin_unlock(&realm->inodes_with_caps_lock); | 650 | spin_unlock(&realm->inodes_with_caps_lock); |
| 651 | iput(lastinode); | 651 | /* avoid calling iput_final() while holding |
| 652 | * mdsc->snap_rwsem or in mds dispatch threads */ | ||
| 653 | ceph_async_iput(lastinode); | ||
| 652 | lastinode = inode; | 654 | lastinode = inode; |
| 653 | ceph_queue_cap_snap(ci); | 655 | ceph_queue_cap_snap(ci); |
| 654 | spin_lock(&realm->inodes_with_caps_lock); | 656 | spin_lock(&realm->inodes_with_caps_lock); |
| 655 | } | 657 | } |
| 656 | spin_unlock(&realm->inodes_with_caps_lock); | 658 | spin_unlock(&realm->inodes_with_caps_lock); |
| 657 | iput(lastinode); | 659 | ceph_async_iput(lastinode); |
| 658 | 660 | ||
| 659 | dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); | 661 | dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); |
| 660 | } | 662 | } |
| @@ -806,7 +808,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc) | |||
| 806 | ihold(inode); | 808 | ihold(inode); |
| 807 | spin_unlock(&mdsc->snap_flush_lock); | 809 | spin_unlock(&mdsc->snap_flush_lock); |
| 808 | ceph_flush_snaps(ci, &session); | 810 | ceph_flush_snaps(ci, &session); |
| 809 | iput(inode); | 811 | /* avoid calling iput_final() while holding |
| 812 | * session->s_mutex or in mds dispatch threads */ | ||
| 813 | ceph_async_iput(inode); | ||
| 810 | spin_lock(&mdsc->snap_flush_lock); | 814 | spin_lock(&mdsc->snap_flush_lock); |
| 811 | } | 815 | } |
| 812 | spin_unlock(&mdsc->snap_flush_lock); | 816 | spin_unlock(&mdsc->snap_flush_lock); |
| @@ -950,12 +954,14 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
| 950 | ceph_get_snap_realm(mdsc, realm); | 954 | ceph_get_snap_realm(mdsc, realm); |
| 951 | ceph_put_snap_realm(mdsc, oldrealm); | 955 | ceph_put_snap_realm(mdsc, oldrealm); |
| 952 | 956 | ||
| 953 | iput(inode); | 957 | /* avoid calling iput_final() while holding |
| 958 | * mdsc->snap_rwsem or mds in dispatch threads */ | ||
| 959 | ceph_async_iput(inode); | ||
| 954 | continue; | 960 | continue; |
| 955 | 961 | ||
| 956 | skip_inode: | 962 | skip_inode: |
| 957 | spin_unlock(&ci->i_ceph_lock); | 963 | spin_unlock(&ci->i_ceph_lock); |
| 958 | iput(inode); | 964 | ceph_async_iput(inode); |
| 959 | } | 965 | } |
| 960 | 966 | ||
| 961 | /* we may have taken some of the old realm's children. */ | 967 | /* we may have taken some of the old realm's children. */ |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 01be7c1bc4c6..d57fa60dcd43 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
| @@ -672,18 +672,12 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, | |||
| 672 | * The number of concurrent works can be high but they don't need | 672 | * The number of concurrent works can be high but they don't need |
| 673 | * to be processed in parallel, limit concurrency. | 673 | * to be processed in parallel, limit concurrency. |
| 674 | */ | 674 | */ |
| 675 | fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1); | 675 | fsc->inode_wq = alloc_workqueue("ceph-inode", WQ_UNBOUND, 0); |
| 676 | if (!fsc->wb_wq) | 676 | if (!fsc->inode_wq) |
| 677 | goto fail_client; | 677 | goto fail_client; |
| 678 | fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1); | ||
| 679 | if (!fsc->pg_inv_wq) | ||
| 680 | goto fail_wb_wq; | ||
| 681 | fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1); | ||
| 682 | if (!fsc->trunc_wq) | ||
| 683 | goto fail_pg_inv_wq; | ||
| 684 | fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); | 678 | fsc->cap_wq = alloc_workqueue("ceph-cap", 0, 1); |
| 685 | if (!fsc->cap_wq) | 679 | if (!fsc->cap_wq) |
| 686 | goto fail_trunc_wq; | 680 | goto fail_inode_wq; |
| 687 | 681 | ||
| 688 | /* set up mempools */ | 682 | /* set up mempools */ |
| 689 | err = -ENOMEM; | 683 | err = -ENOMEM; |
| @@ -697,12 +691,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, | |||
| 697 | 691 | ||
| 698 | fail_cap_wq: | 692 | fail_cap_wq: |
| 699 | destroy_workqueue(fsc->cap_wq); | 693 | destroy_workqueue(fsc->cap_wq); |
| 700 | fail_trunc_wq: | 694 | fail_inode_wq: |
| 701 | destroy_workqueue(fsc->trunc_wq); | 695 | destroy_workqueue(fsc->inode_wq); |
| 702 | fail_pg_inv_wq: | ||
| 703 | destroy_workqueue(fsc->pg_inv_wq); | ||
| 704 | fail_wb_wq: | ||
| 705 | destroy_workqueue(fsc->wb_wq); | ||
| 706 | fail_client: | 696 | fail_client: |
| 707 | ceph_destroy_client(fsc->client); | 697 | ceph_destroy_client(fsc->client); |
| 708 | fail: | 698 | fail: |
| @@ -715,9 +705,7 @@ fail: | |||
| 715 | 705 | ||
| 716 | static void flush_fs_workqueues(struct ceph_fs_client *fsc) | 706 | static void flush_fs_workqueues(struct ceph_fs_client *fsc) |
| 717 | { | 707 | { |
| 718 | flush_workqueue(fsc->wb_wq); | 708 | flush_workqueue(fsc->inode_wq); |
| 719 | flush_workqueue(fsc->pg_inv_wq); | ||
| 720 | flush_workqueue(fsc->trunc_wq); | ||
| 721 | flush_workqueue(fsc->cap_wq); | 709 | flush_workqueue(fsc->cap_wq); |
| 722 | } | 710 | } |
| 723 | 711 | ||
| @@ -725,9 +713,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) | |||
| 725 | { | 713 | { |
| 726 | dout("destroy_fs_client %p\n", fsc); | 714 | dout("destroy_fs_client %p\n", fsc); |
| 727 | 715 | ||
| 728 | destroy_workqueue(fsc->wb_wq); | 716 | destroy_workqueue(fsc->inode_wq); |
| 729 | destroy_workqueue(fsc->pg_inv_wq); | ||
| 730 | destroy_workqueue(fsc->trunc_wq); | ||
| 731 | destroy_workqueue(fsc->cap_wq); | 717 | destroy_workqueue(fsc->cap_wq); |
| 732 | 718 | ||
| 733 | mempool_destroy(fsc->wb_pagevec_pool); | 719 | mempool_destroy(fsc->wb_pagevec_pool); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6edab9a750f8..5f27e1f7f2d6 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -109,9 +109,7 @@ struct ceph_fs_client { | |||
| 109 | mempool_t *wb_pagevec_pool; | 109 | mempool_t *wb_pagevec_pool; |
| 110 | atomic_long_t writeback_count; | 110 | atomic_long_t writeback_count; |
| 111 | 111 | ||
| 112 | struct workqueue_struct *wb_wq; | 112 | struct workqueue_struct *inode_wq; |
| 113 | struct workqueue_struct *pg_inv_wq; | ||
| 114 | struct workqueue_struct *trunc_wq; | ||
| 115 | struct workqueue_struct *cap_wq; | 113 | struct workqueue_struct *cap_wq; |
| 116 | 114 | ||
| 117 | #ifdef CONFIG_DEBUG_FS | 115 | #ifdef CONFIG_DEBUG_FS |
| @@ -387,10 +385,8 @@ struct ceph_inode_info { | |||
| 387 | struct list_head i_snap_realm_item; | 385 | struct list_head i_snap_realm_item; |
| 388 | struct list_head i_snap_flush_item; | 386 | struct list_head i_snap_flush_item; |
| 389 | 387 | ||
| 390 | struct work_struct i_wb_work; /* writeback work */ | 388 | struct work_struct i_work; |
| 391 | struct work_struct i_pg_inv_work; /* page invalidation work */ | 389 | unsigned long i_work_mask; |
| 392 | |||
| 393 | struct work_struct i_vmtruncate_work; | ||
| 394 | 390 | ||
| 395 | #ifdef CONFIG_CEPH_FSCACHE | 391 | #ifdef CONFIG_CEPH_FSCACHE |
| 396 | struct fscache_cookie *fscache; | 392 | struct fscache_cookie *fscache; |
| @@ -513,6 +509,13 @@ static inline struct inode *ceph_find_inode(struct super_block *sb, | |||
| 513 | 509 | ||
| 514 | 510 | ||
| 515 | /* | 511 | /* |
| 512 | * Masks of ceph inode work. | ||
| 513 | */ | ||
| 514 | #define CEPH_I_WORK_WRITEBACK 0 /* writeback */ | ||
| 515 | #define CEPH_I_WORK_INVALIDATE_PAGES 1 /* invalidate pages */ | ||
| 516 | #define CEPH_I_WORK_VMTRUNCATE 2 /* vmtruncate */ | ||
| 517 | |||
| 518 | /* | ||
| 516 | * We set the ERROR_WRITE bit when we start seeing write errors on an inode | 519 | * We set the ERROR_WRITE bit when we start seeing write errors on an inode |
| 517 | * and then clear it when they start succeeding. Note that we do a lockless | 520 | * and then clear it when they start succeeding. Note that we do a lockless |
| 518 | * check first, and only take the lock if it looks like it needs to be changed. | 521 | * check first, and only take the lock if it looks like it needs to be changed. |
| @@ -896,9 +899,9 @@ extern int ceph_inode_holds_cap(struct inode *inode, int mask); | |||
| 896 | extern bool ceph_inode_set_size(struct inode *inode, loff_t size); | 899 | extern bool ceph_inode_set_size(struct inode *inode, loff_t size); |
| 897 | extern void __ceph_do_pending_vmtruncate(struct inode *inode); | 900 | extern void __ceph_do_pending_vmtruncate(struct inode *inode); |
| 898 | extern void ceph_queue_vmtruncate(struct inode *inode); | 901 | extern void ceph_queue_vmtruncate(struct inode *inode); |
| 899 | |||
| 900 | extern void ceph_queue_invalidate(struct inode *inode); | 902 | extern void ceph_queue_invalidate(struct inode *inode); |
| 901 | extern void ceph_queue_writeback(struct inode *inode); | 903 | extern void ceph_queue_writeback(struct inode *inode); |
| 904 | extern void ceph_async_iput(struct inode *inode); | ||
| 902 | 905 | ||
| 903 | extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page, | 906 | extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page, |
| 904 | int mask, bool force); | 907 | int mask, bool force); |
