diff options
author | Yan, Zheng <zyan@redhat.com> | 2015-05-26 21:59:48 -0400 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2015-06-25 04:49:30 -0400 |
commit | 89b52fe14de4d703ba837a7418bb4cd286dcc87f (patch) | |
tree | 718665f96b7a185bc2748ea620a63687d425d3df /fs/ceph | |
parent | 41445999aeec1f0fdf196ab55b2c770473b2ea01 (diff) |
ceph: fix flushing caps
Current ceph_fsync() only flushes dirty caps and wait for them to be
flushed. It doesn't wait for caps that has already been flushing.
This patch makes ceph_fsync() wait for pending flushing caps too.
Besides, this patch also makes caps_are_flushed() peroperly handle
tid wrapping.
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/caps.c | 49 |
1 files changed, 25 insertions, 24 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a80a899e5c41..e9b03b51b874 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1097,8 +1097,7 @@ void ceph_queue_caps_release(struct inode *inode) | |||
1097 | * caller should hold snap_rwsem (read), s_mutex. | 1097 | * caller should hold snap_rwsem (read), s_mutex. |
1098 | */ | 1098 | */ |
1099 | static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | 1099 | static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, |
1100 | int op, int used, int want, int retain, int flushing, | 1100 | int op, int used, int want, int retain, int flushing) |
1101 | unsigned *pflush_tid) | ||
1102 | __releases(cap->ci->i_ceph_lock) | 1101 | __releases(cap->ci->i_ceph_lock) |
1103 | { | 1102 | { |
1104 | struct ceph_inode_info *ci = cap->ci; | 1103 | struct ceph_inode_info *ci = cap->ci; |
@@ -1170,8 +1169,6 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1170 | * first ack clean Ax. | 1169 | * first ack clean Ax. |
1171 | */ | 1170 | */ |
1172 | flush_tid = ++ci->i_cap_flush_last_tid; | 1171 | flush_tid = ++ci->i_cap_flush_last_tid; |
1173 | if (pflush_tid) | ||
1174 | *pflush_tid = flush_tid; | ||
1175 | dout(" cap_flush_tid %d\n", (int)flush_tid); | 1172 | dout(" cap_flush_tid %d\n", (int)flush_tid); |
1176 | for (i = 0; i < CEPH_CAP_BITS; i++) | 1173 | for (i = 0; i < CEPH_CAP_BITS; i++) |
1177 | if (flushing & (1 << i)) | 1174 | if (flushing & (1 << i)) |
@@ -1724,7 +1721,7 @@ ack: | |||
1724 | 1721 | ||
1725 | /* __send_cap drops i_ceph_lock */ | 1722 | /* __send_cap drops i_ceph_lock */ |
1726 | delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, | 1723 | delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, |
1727 | want, retain, flushing, NULL); | 1724 | want, retain, flushing); |
1728 | goto retry; /* retake i_ceph_lock and restart our cap scan. */ | 1725 | goto retry; /* retake i_ceph_lock and restart our cap scan. */ |
1729 | } | 1726 | } |
1730 | 1727 | ||
@@ -1753,12 +1750,12 @@ ack: | |||
1753 | /* | 1750 | /* |
1754 | * Try to flush dirty caps back to the auth mds. | 1751 | * Try to flush dirty caps back to the auth mds. |
1755 | */ | 1752 | */ |
1756 | static int try_flush_caps(struct inode *inode, unsigned *flush_tid) | 1753 | static int try_flush_caps(struct inode *inode, u16 flush_tid[]) |
1757 | { | 1754 | { |
1758 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | 1755 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
1759 | struct ceph_inode_info *ci = ceph_inode(inode); | 1756 | struct ceph_inode_info *ci = ceph_inode(inode); |
1760 | int flushing = 0; | ||
1761 | struct ceph_mds_session *session = NULL; | 1757 | struct ceph_mds_session *session = NULL; |
1758 | int flushing = 0; | ||
1762 | 1759 | ||
1763 | retry: | 1760 | retry: |
1764 | spin_lock(&ci->i_ceph_lock); | 1761 | spin_lock(&ci->i_ceph_lock); |
@@ -1787,17 +1784,19 @@ retry: | |||
1787 | 1784 | ||
1788 | /* __send_cap drops i_ceph_lock */ | 1785 | /* __send_cap drops i_ceph_lock */ |
1789 | delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, | 1786 | delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, |
1790 | cap->issued | cap->implemented, flushing, | 1787 | cap->issued | cap->implemented, flushing); |
1791 | flush_tid); | ||
1792 | if (!delayed) | ||
1793 | goto out_unlocked; | ||
1794 | 1788 | ||
1795 | spin_lock(&ci->i_ceph_lock); | 1789 | spin_lock(&ci->i_ceph_lock); |
1796 | __cap_delay_requeue(mdsc, ci); | 1790 | if (delayed) |
1791 | __cap_delay_requeue(mdsc, ci); | ||
1797 | } | 1792 | } |
1793 | |||
1794 | flushing = ci->i_flushing_caps; | ||
1795 | if (flushing) | ||
1796 | memcpy(flush_tid, ci->i_cap_flush_tid, | ||
1797 | sizeof(ci->i_cap_flush_tid)); | ||
1798 | out: | 1798 | out: |
1799 | spin_unlock(&ci->i_ceph_lock); | 1799 | spin_unlock(&ci->i_ceph_lock); |
1800 | out_unlocked: | ||
1801 | if (session) | 1800 | if (session) |
1802 | mutex_unlock(&session->s_mutex); | 1801 | mutex_unlock(&session->s_mutex); |
1803 | return flushing; | 1802 | return flushing; |
@@ -1806,19 +1805,22 @@ out_unlocked: | |||
1806 | /* | 1805 | /* |
1807 | * Return true if we've flushed caps through the given flush_tid. | 1806 | * Return true if we've flushed caps through the given flush_tid. |
1808 | */ | 1807 | */ |
1809 | static int caps_are_flushed(struct inode *inode, unsigned tid) | 1808 | static int caps_are_flushed(struct inode *inode, u16 flush_tid[]) |
1810 | { | 1809 | { |
1811 | struct ceph_inode_info *ci = ceph_inode(inode); | 1810 | struct ceph_inode_info *ci = ceph_inode(inode); |
1812 | int i, ret = 1; | 1811 | int i, ret = 1; |
1813 | 1812 | ||
1814 | spin_lock(&ci->i_ceph_lock); | 1813 | spin_lock(&ci->i_ceph_lock); |
1815 | for (i = 0; i < CEPH_CAP_BITS; i++) | 1814 | for (i = 0; i < CEPH_CAP_BITS; i++) { |
1816 | if ((ci->i_flushing_caps & (1 << i)) && | 1815 | if (!(ci->i_flushing_caps & (1 << i))) |
1817 | ci->i_cap_flush_tid[i] <= tid) { | 1816 | continue; |
1817 | // tid only has 16 bits. we need to handle wrapping | ||
1818 | if ((s16)(ci->i_cap_flush_tid[i] - flush_tid[i]) <= 0) { | ||
1818 | /* still flushing this bit */ | 1819 | /* still flushing this bit */ |
1819 | ret = 0; | 1820 | ret = 0; |
1820 | break; | 1821 | break; |
1821 | } | 1822 | } |
1823 | } | ||
1822 | spin_unlock(&ci->i_ceph_lock); | 1824 | spin_unlock(&ci->i_ceph_lock); |
1823 | return ret; | 1825 | return ret; |
1824 | } | 1826 | } |
@@ -1871,7 +1873,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
1871 | { | 1873 | { |
1872 | struct inode *inode = file->f_mapping->host; | 1874 | struct inode *inode = file->f_mapping->host; |
1873 | struct ceph_inode_info *ci = ceph_inode(inode); | 1875 | struct ceph_inode_info *ci = ceph_inode(inode); |
1874 | unsigned flush_tid; | 1876 | u16 flush_tid[CEPH_CAP_BITS]; |
1875 | int ret; | 1877 | int ret; |
1876 | int dirty; | 1878 | int dirty; |
1877 | 1879 | ||
@@ -1883,7 +1885,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
1883 | return ret; | 1885 | return ret; |
1884 | mutex_lock(&inode->i_mutex); | 1886 | mutex_lock(&inode->i_mutex); |
1885 | 1887 | ||
1886 | dirty = try_flush_caps(inode, &flush_tid); | 1888 | dirty = try_flush_caps(inode, flush_tid); |
1887 | dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); | 1889 | dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); |
1888 | 1890 | ||
1889 | /* | 1891 | /* |
@@ -1892,7 +1894,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
1892 | * wait for that) | 1894 | * wait for that) |
1893 | */ | 1895 | */ |
1894 | if (!datasync && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { | 1896 | if (!datasync && (dirty & ~CEPH_CAP_ANY_FILE_WR)) { |
1895 | dout("fsync waiting for flush_tid %u\n", flush_tid); | ||
1896 | ret = wait_event_interruptible(ci->i_cap_wq, | 1897 | ret = wait_event_interruptible(ci->i_cap_wq, |
1897 | caps_are_flushed(inode, flush_tid)); | 1898 | caps_are_flushed(inode, flush_tid)); |
1898 | } | 1899 | } |
@@ -1911,14 +1912,14 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) | |||
1911 | int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | 1912 | int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) |
1912 | { | 1913 | { |
1913 | struct ceph_inode_info *ci = ceph_inode(inode); | 1914 | struct ceph_inode_info *ci = ceph_inode(inode); |
1914 | unsigned flush_tid; | 1915 | u16 flush_tid[CEPH_CAP_BITS]; |
1915 | int err = 0; | 1916 | int err = 0; |
1916 | int dirty; | 1917 | int dirty; |
1917 | int wait = wbc->sync_mode == WB_SYNC_ALL; | 1918 | int wait = wbc->sync_mode == WB_SYNC_ALL; |
1918 | 1919 | ||
1919 | dout("write_inode %p wait=%d\n", inode, wait); | 1920 | dout("write_inode %p wait=%d\n", inode, wait); |
1920 | if (wait) { | 1921 | if (wait) { |
1921 | dirty = try_flush_caps(inode, &flush_tid); | 1922 | dirty = try_flush_caps(inode, flush_tid); |
1922 | if (dirty) | 1923 | if (dirty) |
1923 | err = wait_event_interruptible(ci->i_cap_wq, | 1924 | err = wait_event_interruptible(ci->i_cap_wq, |
1924 | caps_are_flushed(inode, flush_tid)); | 1925 | caps_are_flushed(inode, flush_tid)); |
@@ -1988,7 +1989,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, | |||
1988 | __ceph_caps_used(ci), | 1989 | __ceph_caps_used(ci), |
1989 | __ceph_caps_wanted(ci), | 1990 | __ceph_caps_wanted(ci), |
1990 | cap->issued | cap->implemented, | 1991 | cap->issued | cap->implemented, |
1991 | ci->i_flushing_caps, NULL); | 1992 | ci->i_flushing_caps); |
1992 | if (delayed) { | 1993 | if (delayed) { |
1993 | spin_lock(&ci->i_ceph_lock); | 1994 | spin_lock(&ci->i_ceph_lock); |
1994 | __cap_delay_requeue(mdsc, ci); | 1995 | __cap_delay_requeue(mdsc, ci); |
@@ -2027,7 +2028,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc, | |||
2027 | __ceph_caps_used(ci), | 2028 | __ceph_caps_used(ci), |
2028 | __ceph_caps_wanted(ci), | 2029 | __ceph_caps_wanted(ci), |
2029 | cap->issued | cap->implemented, | 2030 | cap->issued | cap->implemented, |
2030 | ci->i_flushing_caps, NULL); | 2031 | ci->i_flushing_caps); |
2031 | if (delayed) { | 2032 | if (delayed) { |
2032 | spin_lock(&ci->i_ceph_lock); | 2033 | spin_lock(&ci->i_ceph_lock); |
2033 | __cap_delay_requeue(mdsc, ci); | 2034 | __cap_delay_requeue(mdsc, ci); |