aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/aops.c
diff options
context:
space:
mode:
authorRyan Ding <ryan.ding@oracle.com>2016-03-25 17:21:06 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-25 19:37:42 -0400
commit4506cfb6f8cad594ac73e0df2b2961ca10dbd25e (patch)
tree2dff95967b493d1629ad384ec593895564c64c03 /fs/ocfs2/aops.c
parent2de6a3c73180ef4071b45185756be51a6c767924 (diff)
ocfs2: record UNWRITTEN extents when populate write desc
To support direct io in ocfs2_write_begin_nolock & ocfs2_write_end_nolock. There is still one issue in the direct write procedure. phase 1: alloc extent with UNWRITTEN flag phase 2: submit direct data to disk, add zero page to page cache phase 3: clear UNWRITTEN flag when data has been written to disk When there are 2 direct write A(0~3KB),B(4~7KB) writing to the same cluster 0~7KB (cluster size 8KB). Write request A arrive phase 2 first, it will zero the region (4~7KB). Before request A enter to phase 3, request B arrive phase 2, it will zero region (0~3KB). This is just like request B steps request A. To resolve this issue, we should let request B knows this cluster is already under zero, to prevent it from steps the previous write request. This patch will add function ocfs2_unwritten_check() to do this job. It will record all clusters that are under direct write(it will be recorded in the 'ip_unwritten_list' member of inode info), and prevent the later direct write writing to the same cluster to do the zero work again. Signed-off-by: Ryan Ding <ryan.ding@oracle.com> Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com> Cc: Joseph Qi <joseph.qi@huawei.com> Cc: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/aops.c')
-rw-r--r--fs/ocfs2/aops.c104
1 files changed, 99 insertions, 5 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 7b268c357cf3..c29d06634fd6 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1201,6 +1201,13 @@ next_bh:
1201 1201
1202#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE) 1202#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)
1203 1203
1204struct ocfs2_unwritten_extent {
1205 struct list_head ue_node;
1206 struct list_head ue_ip_node;
1207 u32 ue_cpos;
1208 u32 ue_phys;
1209};
1210
1204/* 1211/*
1205 * Describe the state of a single cluster to be written to. 1212 * Describe the state of a single cluster to be written to.
1206 */ 1213 */
@@ -1275,6 +1282,8 @@ struct ocfs2_write_ctxt {
1275 struct buffer_head *w_di_bh; 1282 struct buffer_head *w_di_bh;
1276 1283
1277 struct ocfs2_cached_dealloc_ctxt w_dealloc; 1284 struct ocfs2_cached_dealloc_ctxt w_dealloc;
1285
1286 struct list_head w_unwritten_list;
1278}; 1287};
1279 1288
1280void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) 1289void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
@@ -1313,8 +1322,25 @@ static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
1313 ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); 1322 ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
1314} 1323}
1315 1324
1316static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) 1325static void ocfs2_free_unwritten_list(struct inode *inode,
1326 struct list_head *head)
1327{
1328 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1329 struct ocfs2_unwritten_extent *dz = NULL, *tmp = NULL;
1330
1331 list_for_each_entry_safe(dz, tmp, head, ue_node) {
1332 list_del(&dz->ue_node);
1333 spin_lock(&oi->ip_lock);
1334 list_del(&dz->ue_ip_node);
1335 spin_unlock(&oi->ip_lock);
1336 kfree(dz);
1337 }
1338}
1339
1340static void ocfs2_free_write_ctxt(struct inode *inode,
1341 struct ocfs2_write_ctxt *wc)
1317{ 1342{
1343 ocfs2_free_unwritten_list(inode, &wc->w_unwritten_list);
1318 ocfs2_unlock_pages(wc); 1344 ocfs2_unlock_pages(wc);
1319 brelse(wc->w_di_bh); 1345 brelse(wc->w_di_bh);
1320 kfree(wc); 1346 kfree(wc);
@@ -1346,6 +1372,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
1346 wc->w_large_pages = 0; 1372 wc->w_large_pages = 0;
1347 1373
1348 ocfs2_init_dealloc_ctxt(&wc->w_dealloc); 1374 ocfs2_init_dealloc_ctxt(&wc->w_dealloc);
1375 INIT_LIST_HEAD(&wc->w_unwritten_list);
1349 1376
1350 *wcp = wc; 1377 *wcp = wc;
1351 1378
@@ -1796,6 +1823,66 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1796} 1823}
1797 1824
1798/* 1825/*
1826 * Check if this extent is marked UNWRITTEN by direct io. If so, we need not to
1827 * do the zero work. And should not to clear UNWRITTEN since it will be cleared
1828 * by the direct io procedure.
1829 * If this is a new extent that allocated by direct io, we should mark it in
1830 * the ip_unwritten_list.
1831 */
1832static int ocfs2_unwritten_check(struct inode *inode,
1833 struct ocfs2_write_ctxt *wc,
1834 struct ocfs2_write_cluster_desc *desc)
1835{
1836 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1837 struct ocfs2_unwritten_extent *dz = NULL, *new = NULL;
1838 int ret = 0;
1839
1840 if (!desc->c_needs_zero)
1841 return 0;
1842
1843retry:
1844 spin_lock(&oi->ip_lock);
1845 /* Needs not to zero no metter buffer or direct. The one who is zero
1846 * the cluster is doing zero. And he will clear unwritten after all
1847 * cluster io finished. */
1848 list_for_each_entry(dz, &oi->ip_unwritten_list, ue_ip_node) {
1849 if (desc->c_cpos == dz->ue_cpos) {
1850 BUG_ON(desc->c_new);
1851 desc->c_needs_zero = 0;
1852 desc->c_clear_unwritten = 0;
1853 goto unlock;
1854 }
1855 }
1856
1857 if (wc->w_type != OCFS2_WRITE_DIRECT)
1858 goto unlock;
1859
1860 if (new == NULL) {
1861 spin_unlock(&oi->ip_lock);
1862 new = kmalloc(sizeof(struct ocfs2_unwritten_extent),
1863 GFP_NOFS);
1864 if (new == NULL) {
1865 ret = -ENOMEM;
1866 goto out;
1867 }
1868 goto retry;
1869 }
1870 /* This direct write will doing zero. */
1871 new->ue_cpos = desc->c_cpos;
1872 new->ue_phys = desc->c_phys;
1873 desc->c_clear_unwritten = 0;
1874 list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list);
1875 list_add_tail(&new->ue_node, &wc->w_unwritten_list);
1876 new = NULL;
1877unlock:
1878 spin_unlock(&oi->ip_lock);
1879out:
1880 if (new)
1881 kfree(new);
1882 return ret;
1883}
1884
1885/*
1799 * Populate each single-cluster write descriptor in the write context 1886 * Populate each single-cluster write descriptor in the write context
1800 * with information about the i/o to be done. 1887 * with information about the i/o to be done.
1801 * 1888 *
@@ -1879,6 +1966,12 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1879 desc->c_needs_zero = 1; 1966 desc->c_needs_zero = 1;
1880 } 1967 }
1881 1968
1969 ret = ocfs2_unwritten_check(inode, wc, desc);
1970 if (ret) {
1971 mlog_errno(ret);
1972 goto out;
1973 }
1974
1882 num_clusters--; 1975 num_clusters--;
1883 } 1976 }
1884 1977
@@ -2215,9 +2308,8 @@ try_again:
2215 * and non-sparse clusters we just extended. For non-sparse writes, 2308 * and non-sparse clusters we just extended. For non-sparse writes,
2216 * we know zeros will only be needed in the first and/or last cluster. 2309 * we know zeros will only be needed in the first and/or last cluster.
2217 */ 2310 */
2218 if (clusters_to_alloc || extents_to_split || 2311 if (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
2219 (wc->w_clen && (wc->w_desc[0].c_needs_zero || 2312 wc->w_desc[wc->w_clen - 1].c_needs_zero))
2220 wc->w_desc[wc->w_clen - 1].c_needs_zero)))
2221 cluster_of_pages = 1; 2313 cluster_of_pages = 1;
2222 else 2314 else
2223 cluster_of_pages = 0; 2315 cluster_of_pages = 0;
@@ -2296,7 +2388,7 @@ out_commit:
2296 ocfs2_commit_trans(osb, handle); 2388 ocfs2_commit_trans(osb, handle);
2297 2389
2298out: 2390out:
2299 ocfs2_free_write_ctxt(wc); 2391 ocfs2_free_write_ctxt(inode, wc);
2300 2392
2301 if (data_ac) { 2393 if (data_ac) {
2302 ocfs2_free_alloc_context(data_ac); 2394 ocfs2_free_alloc_context(data_ac);
@@ -2406,6 +2498,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
2406 handle_t *handle = wc->w_handle; 2498 handle_t *handle = wc->w_handle;
2407 struct page *tmppage; 2499 struct page *tmppage;
2408 2500
2501 BUG_ON(!list_empty(&wc->w_unwritten_list));
2502
2409 if (handle) { 2503 if (handle) {
2410 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), 2504 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
2411 wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE); 2505 wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE);