diff options
author | Trond Myklebust <trond.myklebust@primarydata.com> | 2014-01-13 13:34:36 -0500 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2014-01-13 13:34:36 -0500 |
commit | 71244d9bdf185e5bba1473254241f9f65d4dd0d8 (patch) | |
tree | 054ddd9a95c3ada5243dead8da568471fc7d5137 | |
parent | d8c951c313ed1d7144b55c0d56f7c53220044dda (diff) |
NFSv4.1: Fix a race in nfs4_write_inode
nfs4_write_inode() must not be allowed to exit until the layoutcommit
is done. That means that both NFS_INO_LAYOUTCOMMIT and
NFS_INO_LAYOUTCOMMITTING have to be cleared.
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
-rw-r--r-- | fs/nfs/nfs4super.c | 14 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 67 |
2 files changed, 38 insertions, 43 deletions
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 65ab0a0ca1c4..808f29574412 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c | |||
@@ -77,17 +77,9 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
77 | { | 77 | { |
78 | int ret = nfs_write_inode(inode, wbc); | 78 | int ret = nfs_write_inode(inode, wbc); |
79 | 79 | ||
80 | if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { | 80 | if (ret == 0) |
81 | int status; | 81 | ret = pnfs_layoutcommit_inode(inode, |
82 | bool sync = true; | 82 | wbc->sync_mode == WB_SYNC_ALL); |
83 | |||
84 | if (wbc->sync_mode == WB_SYNC_NONE) | ||
85 | sync = false; | ||
86 | |||
87 | status = pnfs_layoutcommit_inode(inode, sync); | ||
88 | if (status < 0) | ||
89 | return status; | ||
90 | } | ||
91 | return ret; | 83 | return ret; |
92 | } | 84 | } |
93 | 85 | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d75d938d36cb..4755858e37a0 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -1790,6 +1790,15 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) | |||
1790 | } | 1790 | } |
1791 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); | 1791 | EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); |
1792 | 1792 | ||
1793 | static void pnfs_clear_layoutcommitting(struct inode *inode) | ||
1794 | { | ||
1795 | unsigned long *bitlock = &NFS_I(inode)->flags; | ||
1796 | |||
1797 | clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); | ||
1798 | smp_mb__after_clear_bit(); | ||
1799 | wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); | ||
1800 | } | ||
1801 | |||
1793 | /* | 1802 | /* |
1794 | * There can be multiple RW segments. | 1803 | * There can be multiple RW segments. |
1795 | */ | 1804 | */ |
@@ -1807,7 +1816,6 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp) | |||
1807 | static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) | 1816 | static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *listp) |
1808 | { | 1817 | { |
1809 | struct pnfs_layout_segment *lseg, *tmp; | 1818 | struct pnfs_layout_segment *lseg, *tmp; |
1810 | unsigned long *bitlock = &NFS_I(inode)->flags; | ||
1811 | 1819 | ||
1812 | /* Matched by references in pnfs_set_layoutcommit */ | 1820 | /* Matched by references in pnfs_set_layoutcommit */ |
1813 | list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { | 1821 | list_for_each_entry_safe(lseg, tmp, listp, pls_lc_list) { |
@@ -1815,9 +1823,7 @@ static void pnfs_list_write_lseg_done(struct inode *inode, struct list_head *lis | |||
1815 | pnfs_put_lseg(lseg); | 1823 | pnfs_put_lseg(lseg); |
1816 | } | 1824 | } |
1817 | 1825 | ||
1818 | clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); | 1826 | pnfs_clear_layoutcommitting(inode); |
1819 | smp_mb__after_clear_bit(); | ||
1820 | wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING); | ||
1821 | } | 1827 | } |
1822 | 1828 | ||
1823 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) | 1829 | void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) |
@@ -1881,43 +1887,37 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) | |||
1881 | struct nfs4_layoutcommit_data *data; | 1887 | struct nfs4_layoutcommit_data *data; |
1882 | struct nfs_inode *nfsi = NFS_I(inode); | 1888 | struct nfs_inode *nfsi = NFS_I(inode); |
1883 | loff_t end_pos; | 1889 | loff_t end_pos; |
1884 | int status = 0; | 1890 | int status; |
1885 | 1891 | ||
1886 | dprintk("--> %s inode %lu\n", __func__, inode->i_ino); | 1892 | if (!pnfs_layoutcommit_outstanding(inode)) |
1887 | |||
1888 | if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | ||
1889 | return 0; | 1893 | return 0; |
1890 | 1894 | ||
1891 | /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ | 1895 | dprintk("--> %s inode %lu\n", __func__, inode->i_ino); |
1892 | data = kzalloc(sizeof(*data), GFP_NOFS); | ||
1893 | if (!data) { | ||
1894 | status = -ENOMEM; | ||
1895 | goto out; | ||
1896 | } | ||
1897 | |||
1898 | if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | ||
1899 | goto out_free; | ||
1900 | 1896 | ||
1897 | status = -EAGAIN; | ||
1901 | if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { | 1898 | if (test_and_set_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags)) { |
1902 | if (!sync) { | 1899 | if (!sync) |
1903 | status = -EAGAIN; | 1900 | goto out; |
1904 | goto out_free; | 1901 | status = wait_on_bit_lock(&nfsi->flags, |
1905 | } | 1902 | NFS_INO_LAYOUTCOMMITTING, |
1906 | status = wait_on_bit_lock(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING, | 1903 | nfs_wait_bit_killable, |
1907 | nfs_wait_bit_killable, TASK_KILLABLE); | 1904 | TASK_KILLABLE); |
1908 | if (status) | 1905 | if (status) |
1909 | goto out_free; | 1906 | goto out; |
1910 | } | 1907 | } |
1911 | 1908 | ||
1912 | INIT_LIST_HEAD(&data->lseg_list); | 1909 | status = -ENOMEM; |
1910 | /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ | ||
1911 | data = kzalloc(sizeof(*data), GFP_NOFS); | ||
1912 | if (!data) | ||
1913 | goto clear_layoutcommitting; | ||
1914 | |||
1915 | status = 0; | ||
1913 | spin_lock(&inode->i_lock); | 1916 | spin_lock(&inode->i_lock); |
1914 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | 1917 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) |
1915 | clear_bit(NFS_INO_LAYOUTCOMMITTING, &nfsi->flags); | 1918 | goto out_unlock; |
1916 | spin_unlock(&inode->i_lock); | ||
1917 | wake_up_bit(&nfsi->flags, NFS_INO_LAYOUTCOMMITTING); | ||
1918 | goto out_free; | ||
1919 | } | ||
1920 | 1919 | ||
1920 | INIT_LIST_HEAD(&data->lseg_list); | ||
1921 | pnfs_list_write_lseg(inode, &data->lseg_list); | 1921 | pnfs_list_write_lseg(inode, &data->lseg_list); |
1922 | 1922 | ||
1923 | end_pos = nfsi->layout->plh_lwb; | 1923 | end_pos = nfsi->layout->plh_lwb; |
@@ -1940,8 +1940,11 @@ out: | |||
1940 | mark_inode_dirty_sync(inode); | 1940 | mark_inode_dirty_sync(inode); |
1941 | dprintk("<-- %s status %d\n", __func__, status); | 1941 | dprintk("<-- %s status %d\n", __func__, status); |
1942 | return status; | 1942 | return status; |
1943 | out_free: | 1943 | out_unlock: |
1944 | spin_unlock(&inode->i_lock); | ||
1944 | kfree(data); | 1945 | kfree(data); |
1946 | clear_layoutcommitting: | ||
1947 | pnfs_clear_layoutcommitting(inode); | ||
1945 | goto out; | 1948 | goto out; |
1946 | } | 1949 | } |
1947 | 1950 | ||