diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2007-06-18 14:22:56 -0400 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-07-10 20:32:03 -0400 |
commit | b27b7cbcf12a1bfff1ed68a73ddd7d11edc20daf (patch) | |
tree | a25a8ca272e7f0ef01987db3b3795a49e0ccb51f /fs/ocfs2/aops.c | |
parent | 0d172baa5586071ae0ae0c07356a378fdbedecdb (diff) |
ocfs2: support writing of unwritten extents
Update the write code to detect when the user is asking to write to an
unwritten extent. Like writing to a hole, we must zero the region between
the write and the cluster boundaries. Most of the existing cluster zeroing
logic can be re-used with some additional checks for the unwritten flag on
extent records.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/aops.c')
-rw-r--r-- | fs/ocfs2/aops.c | 94 |
1 files changed, 74 insertions, 20 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 077583b50391..8af923316d22 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -782,8 +782,14 @@ struct ocfs2_write_cluster_desc { | |||
782 | * filled. | 782 | * filled. |
783 | */ | 783 | */ |
784 | unsigned c_new; | 784 | unsigned c_new; |
785 | unsigned c_unwritten; | ||
785 | }; | 786 | }; |
786 | 787 | ||
788 | static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d) | ||
789 | { | ||
790 | return d->c_new || d->c_unwritten; | ||
791 | } | ||
792 | |||
787 | struct ocfs2_write_ctxt { | 793 | struct ocfs2_write_ctxt { |
788 | /* Logical cluster position / len of write */ | 794 | /* Logical cluster position / len of write */ |
789 | u32 w_cpos; | 795 | u32 w_cpos; |
@@ -829,6 +835,8 @@ struct ocfs2_write_ctxt { | |||
829 | handle_t *w_handle; | 835 | handle_t *w_handle; |
830 | 836 | ||
831 | struct buffer_head *w_di_bh; | 837 | struct buffer_head *w_di_bh; |
838 | |||
839 | struct ocfs2_cached_dealloc_ctxt w_dealloc; | ||
832 | }; | 840 | }; |
833 | 841 | ||
834 | static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) | 842 | static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) |
@@ -868,6 +876,8 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, | |||
868 | else | 876 | else |
869 | wc->w_large_pages = 0; | 877 | wc->w_large_pages = 0; |
870 | 878 | ||
879 | ocfs2_init_dealloc_ctxt(&wc->w_dealloc); | ||
880 | |||
871 | *wcp = wc; | 881 | *wcp = wc; |
872 | 882 | ||
873 | return 0; | 883 | return 0; |
@@ -1103,16 +1113,19 @@ out: | |||
1103 | * Prepare a single cluster for write one cluster into the file. | 1113 | * Prepare a single cluster for write one cluster into the file. |
1104 | */ | 1114 | */ |
1105 | static int ocfs2_write_cluster(struct address_space *mapping, | 1115 | static int ocfs2_write_cluster(struct address_space *mapping, |
1106 | u32 phys, struct ocfs2_alloc_context *data_ac, | 1116 | u32 phys, unsigned int unwritten, |
1117 | struct ocfs2_alloc_context *data_ac, | ||
1107 | struct ocfs2_alloc_context *meta_ac, | 1118 | struct ocfs2_alloc_context *meta_ac, |
1108 | struct ocfs2_write_ctxt *wc, u32 cpos, | 1119 | struct ocfs2_write_ctxt *wc, u32 cpos, |
1109 | loff_t user_pos, unsigned user_len) | 1120 | loff_t user_pos, unsigned user_len) |
1110 | { | 1121 | { |
1111 | int ret, i, new; | 1122 | int ret, i, new, should_zero = 0; |
1112 | u64 v_blkno, p_blkno; | 1123 | u64 v_blkno, p_blkno; |
1113 | struct inode *inode = mapping->host; | 1124 | struct inode *inode = mapping->host; |
1114 | 1125 | ||
1115 | new = phys == 0 ? 1 : 0; | 1126 | new = phys == 0 ? 1 : 0; |
1127 | if (new || unwritten) | ||
1128 | should_zero = 1; | ||
1116 | 1129 | ||
1117 | if (new) { | 1130 | if (new) { |
1118 | u32 tmp_pos; | 1131 | u32 tmp_pos; |
@@ -1142,11 +1155,20 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1142 | mlog_errno(ret); | 1155 | mlog_errno(ret); |
1143 | goto out; | 1156 | goto out; |
1144 | } | 1157 | } |
1158 | } else if (unwritten) { | ||
1159 | ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, | ||
1160 | wc->w_handle, cpos, 1, phys, | ||
1161 | meta_ac, &wc->w_dealloc); | ||
1162 | if (ret < 0) { | ||
1163 | mlog_errno(ret); | ||
1164 | goto out; | ||
1165 | } | ||
1166 | } | ||
1145 | 1167 | ||
1168 | if (should_zero) | ||
1146 | v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos); | 1169 | v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos); |
1147 | } else { | 1170 | else |
1148 | v_blkno = user_pos >> inode->i_sb->s_blocksize_bits; | 1171 | v_blkno = user_pos >> inode->i_sb->s_blocksize_bits; |
1149 | } | ||
1150 | 1172 | ||
1151 | /* | 1173 | /* |
1152 | * The only reason this should fail is due to an inability to | 1174 | * The only reason this should fail is due to an inability to |
@@ -1169,7 +1191,8 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
1169 | 1191 | ||
1170 | tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, | 1192 | tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, |
1171 | wc->w_pages[i], cpos, | 1193 | wc->w_pages[i], cpos, |
1172 | user_pos, user_len, new); | 1194 | user_pos, user_len, |
1195 | should_zero); | ||
1173 | if (tmpret) { | 1196 | if (tmpret) { |
1174 | mlog_errno(tmpret); | 1197 | mlog_errno(tmpret); |
1175 | if (ret == 0) | 1198 | if (ret == 0) |
@@ -1200,8 +1223,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping, | |||
1200 | for (i = 0; i < wc->w_clen; i++) { | 1223 | for (i = 0; i < wc->w_clen; i++) { |
1201 | desc = &wc->w_desc[i]; | 1224 | desc = &wc->w_desc[i]; |
1202 | 1225 | ||
1203 | ret = ocfs2_write_cluster(mapping, desc->c_phys, data_ac, | 1226 | ret = ocfs2_write_cluster(mapping, desc->c_phys, |
1204 | meta_ac, wc, desc->c_cpos, pos, len); | 1227 | desc->c_unwritten, data_ac, meta_ac, |
1228 | wc, desc->c_cpos, pos, len); | ||
1205 | if (ret) { | 1229 | if (ret) { |
1206 | mlog_errno(ret); | 1230 | mlog_errno(ret); |
1207 | goto out; | 1231 | goto out; |
@@ -1242,19 +1266,19 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, | |||
1242 | if (wc->w_large_pages) { | 1266 | if (wc->w_large_pages) { |
1243 | /* | 1267 | /* |
1244 | * We only care about the 1st and last cluster within | 1268 | * We only care about the 1st and last cluster within |
1245 | * our range and whether they are holes or not. Either | 1269 | * our range and whether they should be zero'd or not. Either |
1246 | * value may be extended out to the start/end of a | 1270 | * value may be extended out to the start/end of a |
1247 | * newly allocated cluster. | 1271 | * newly allocated cluster. |
1248 | */ | 1272 | */ |
1249 | desc = &wc->w_desc[0]; | 1273 | desc = &wc->w_desc[0]; |
1250 | if (desc->c_new) | 1274 | if (ocfs2_should_zero_cluster(desc)) |
1251 | ocfs2_figure_cluster_boundaries(osb, | 1275 | ocfs2_figure_cluster_boundaries(osb, |
1252 | desc->c_cpos, | 1276 | desc->c_cpos, |
1253 | &wc->w_target_from, | 1277 | &wc->w_target_from, |
1254 | NULL); | 1278 | NULL); |
1255 | 1279 | ||
1256 | desc = &wc->w_desc[wc->w_clen - 1]; | 1280 | desc = &wc->w_desc[wc->w_clen - 1]; |
1257 | if (desc->c_new) | 1281 | if (ocfs2_should_zero_cluster(desc)) |
1258 | ocfs2_figure_cluster_boundaries(osb, | 1282 | ocfs2_figure_cluster_boundaries(osb, |
1259 | desc->c_cpos, | 1283 | desc->c_cpos, |
1260 | NULL, | 1284 | NULL, |
@@ -1268,28 +1292,52 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, | |||
1268 | /* | 1292 | /* |
1269 | * Populate each single-cluster write descriptor in the write context | 1293 | * Populate each single-cluster write descriptor in the write context |
1270 | * with information about the i/o to be done. | 1294 | * with information about the i/o to be done. |
1295 | * | ||
1296 | * Returns the number of clusters that will have to be allocated, as | ||
1297 | * well as a worst case estimate of the number of extent records that | ||
1298 | * would have to be created during a write to an unwritten region. | ||
1271 | */ | 1299 | */ |
1272 | static int ocfs2_populate_write_desc(struct inode *inode, | 1300 | static int ocfs2_populate_write_desc(struct inode *inode, |
1273 | struct ocfs2_write_ctxt *wc, | 1301 | struct ocfs2_write_ctxt *wc, |
1274 | unsigned int *clusters_to_alloc) | 1302 | unsigned int *clusters_to_alloc, |
1303 | unsigned int *extents_to_split) | ||
1275 | { | 1304 | { |
1276 | int ret; | 1305 | int ret; |
1277 | struct ocfs2_write_cluster_desc *desc; | 1306 | struct ocfs2_write_cluster_desc *desc; |
1278 | unsigned int num_clusters = 0; | 1307 | unsigned int num_clusters = 0; |
1308 | unsigned int ext_flags = 0; | ||
1279 | u32 phys = 0; | 1309 | u32 phys = 0; |
1280 | int i; | 1310 | int i; |
1281 | 1311 | ||
1312 | *clusters_to_alloc = 0; | ||
1313 | *extents_to_split = 0; | ||
1314 | |||
1282 | for (i = 0; i < wc->w_clen; i++) { | 1315 | for (i = 0; i < wc->w_clen; i++) { |
1283 | desc = &wc->w_desc[i]; | 1316 | desc = &wc->w_desc[i]; |
1284 | desc->c_cpos = wc->w_cpos + i; | 1317 | desc->c_cpos = wc->w_cpos + i; |
1285 | 1318 | ||
1286 | if (num_clusters == 0) { | 1319 | if (num_clusters == 0) { |
1320 | /* | ||
1321 | * Need to look up the next extent record. | ||
1322 | */ | ||
1287 | ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys, | 1323 | ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys, |
1288 | &num_clusters, NULL); | 1324 | &num_clusters, &ext_flags); |
1289 | if (ret) { | 1325 | if (ret) { |
1290 | mlog_errno(ret); | 1326 | mlog_errno(ret); |
1291 | goto out; | 1327 | goto out; |
1292 | } | 1328 | } |
1329 | |||
1330 | /* | ||
1331 | * Assume worst case - that we're writing in | ||
1332 | * the middle of the extent. | ||
1333 | * | ||
1334 | * We can assume that the write proceeds from | ||
1335 | * left to right, in which case the extent | ||
1336 | * insert code is smart enough to coalesce the | ||
1337 | * next splits into the previous records created. | ||
1338 | */ | ||
1339 | if (ext_flags & OCFS2_EXT_UNWRITTEN) | ||
1340 | *extents_to_split = *extents_to_split + 2; | ||
1293 | } else if (phys) { | 1341 | } else if (phys) { |
1294 | /* | 1342 | /* |
1295 | * Only increment phys if it doesn't describe | 1343 | * Only increment phys if it doesn't describe |
@@ -1303,6 +1351,8 @@ static int ocfs2_populate_write_desc(struct inode *inode, | |||
1303 | desc->c_new = 1; | 1351 | desc->c_new = 1; |
1304 | *clusters_to_alloc = *clusters_to_alloc + 1; | 1352 | *clusters_to_alloc = *clusters_to_alloc + 1; |
1305 | } | 1353 | } |
1354 | if (ext_flags & OCFS2_EXT_UNWRITTEN) | ||
1355 | desc->c_unwritten = 1; | ||
1306 | 1356 | ||
1307 | num_clusters--; | 1357 | num_clusters--; |
1308 | } | 1358 | } |
@@ -1318,7 +1368,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1318 | struct buffer_head *di_bh, struct page *mmap_page) | 1368 | struct buffer_head *di_bh, struct page *mmap_page) |
1319 | { | 1369 | { |
1320 | int ret, credits = OCFS2_INODE_UPDATE_CREDITS; | 1370 | int ret, credits = OCFS2_INODE_UPDATE_CREDITS; |
1321 | unsigned int clusters_to_alloc = 0; | 1371 | unsigned int clusters_to_alloc, extents_to_split; |
1322 | struct ocfs2_write_ctxt *wc; | 1372 | struct ocfs2_write_ctxt *wc; |
1323 | struct inode *inode = mapping->host; | 1373 | struct inode *inode = mapping->host; |
1324 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1374 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
@@ -1333,7 +1383,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1333 | return ret; | 1383 | return ret; |
1334 | } | 1384 | } |
1335 | 1385 | ||
1336 | ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc); | 1386 | ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, |
1387 | &extents_to_split); | ||
1337 | if (ret) { | 1388 | if (ret) { |
1338 | mlog_errno(ret); | 1389 | mlog_errno(ret); |
1339 | goto out; | 1390 | goto out; |
@@ -1347,14 +1398,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1347 | * write out. An allocation requires that we write the entire | 1398 | * write out. An allocation requires that we write the entire |
1348 | * cluster range. | 1399 | * cluster range. |
1349 | */ | 1400 | */ |
1350 | if (clusters_to_alloc > 0) { | 1401 | if (clusters_to_alloc || extents_to_split) { |
1351 | /* | 1402 | /* |
1352 | * XXX: We are stretching the limits of | 1403 | * XXX: We are stretching the limits of |
1353 | * ocfs2_lock_allocators(). It greately over-estimates | 1404 | * ocfs2_lock_allocators(). It greatly over-estimates |
1354 | * the work to be done. | 1405 | * the work to be done. |
1355 | */ | 1406 | */ |
1356 | ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, | 1407 | ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, |
1357 | &data_ac, &meta_ac); | 1408 | extents_to_split, &data_ac, &meta_ac); |
1358 | if (ret) { | 1409 | if (ret) { |
1359 | mlog_errno(ret); | 1410 | mlog_errno(ret); |
1360 | goto out; | 1411 | goto out; |
@@ -1365,7 +1416,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1365 | 1416 | ||
1366 | } | 1417 | } |
1367 | 1418 | ||
1368 | ocfs2_set_target_boundaries(osb, wc, pos, len, clusters_to_alloc); | 1419 | ocfs2_set_target_boundaries(osb, wc, pos, len, |
1420 | clusters_to_alloc + extents_to_split); | ||
1369 | 1421 | ||
1370 | handle = ocfs2_start_trans(osb, credits); | 1422 | handle = ocfs2_start_trans(osb, credits); |
1371 | if (IS_ERR(handle)) { | 1423 | if (IS_ERR(handle)) { |
@@ -1393,7 +1445,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1393 | * extent. | 1445 | * extent. |
1394 | */ | 1446 | */ |
1395 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, | 1447 | ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, |
1396 | clusters_to_alloc, mmap_page); | 1448 | clusters_to_alloc + extents_to_split, |
1449 | mmap_page); | ||
1397 | if (ret) { | 1450 | if (ret) { |
1398 | mlog_errno(ret); | 1451 | mlog_errno(ret); |
1399 | goto out_commit; | 1452 | goto out_commit; |
@@ -1538,11 +1591,12 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
1538 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 1591 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
1539 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | 1592 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); |
1540 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 1593 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
1541 | |||
1542 | ocfs2_journal_dirty(handle, wc->w_di_bh); | 1594 | ocfs2_journal_dirty(handle, wc->w_di_bh); |
1543 | 1595 | ||
1544 | ocfs2_commit_trans(osb, handle); | 1596 | ocfs2_commit_trans(osb, handle); |
1545 | 1597 | ||
1598 | ocfs2_run_deallocs(osb, &wc->w_dealloc); | ||
1599 | |||
1546 | ocfs2_free_write_ctxt(wc); | 1600 | ocfs2_free_write_ctxt(wc); |
1547 | 1601 | ||
1548 | return copied; | 1602 | return copied; |