aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2007-06-18 14:22:56 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2007-07-10 20:32:03 -0400
commitb27b7cbcf12a1bfff1ed68a73ddd7d11edc20daf (patch)
treea25a8ca272e7f0ef01987db3b3795a49e0ccb51f
parent0d172baa5586071ae0ae0c07356a378fdbedecdb (diff)
ocfs2: support writing of unwritten extents
Update the write code to detect when the user is asking to write to an unwritten extent. Like writing to a hole, we must zero the region between the write and the cluster boundaries. Most of the existing cluster zeroing logic can be re-used with some additional checks for the unwritten flag on extent records. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r--fs/ocfs2/aops.c94
-rw-r--r--fs/ocfs2/file.c14
-rw-r--r--fs/ocfs2/file.h2
3 files changed, 84 insertions, 26 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 077583b50391..8af923316d22 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -782,8 +782,14 @@ struct ocfs2_write_cluster_desc {
782 * filled. 782 * filled.
783 */ 783 */
784 unsigned c_new; 784 unsigned c_new;
785 unsigned c_unwritten;
785}; 786};
786 787
788static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
789{
790 return d->c_new || d->c_unwritten;
791}
792
787struct ocfs2_write_ctxt { 793struct ocfs2_write_ctxt {
788 /* Logical cluster position / len of write */ 794 /* Logical cluster position / len of write */
789 u32 w_cpos; 795 u32 w_cpos;
@@ -829,6 +835,8 @@ struct ocfs2_write_ctxt {
829 handle_t *w_handle; 835 handle_t *w_handle;
830 836
831 struct buffer_head *w_di_bh; 837 struct buffer_head *w_di_bh;
838
839 struct ocfs2_cached_dealloc_ctxt w_dealloc;
832}; 840};
833 841
834static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) 842static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
@@ -868,6 +876,8 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
868 else 876 else
869 wc->w_large_pages = 0; 877 wc->w_large_pages = 0;
870 878
879 ocfs2_init_dealloc_ctxt(&wc->w_dealloc);
880
871 *wcp = wc; 881 *wcp = wc;
872 882
873 return 0; 883 return 0;
@@ -1103,16 +1113,19 @@ out:
1103 * Prepare a single cluster for write one cluster into the file. 1113 * Prepare a single cluster for write one cluster into the file.
1104 */ 1114 */
1105static int ocfs2_write_cluster(struct address_space *mapping, 1115static int ocfs2_write_cluster(struct address_space *mapping,
1106 u32 phys, struct ocfs2_alloc_context *data_ac, 1116 u32 phys, unsigned int unwritten,
1117 struct ocfs2_alloc_context *data_ac,
1107 struct ocfs2_alloc_context *meta_ac, 1118 struct ocfs2_alloc_context *meta_ac,
1108 struct ocfs2_write_ctxt *wc, u32 cpos, 1119 struct ocfs2_write_ctxt *wc, u32 cpos,
1109 loff_t user_pos, unsigned user_len) 1120 loff_t user_pos, unsigned user_len)
1110{ 1121{
1111 int ret, i, new; 1122 int ret, i, new, should_zero = 0;
1112 u64 v_blkno, p_blkno; 1123 u64 v_blkno, p_blkno;
1113 struct inode *inode = mapping->host; 1124 struct inode *inode = mapping->host;
1114 1125
1115 new = phys == 0 ? 1 : 0; 1126 new = phys == 0 ? 1 : 0;
1127 if (new || unwritten)
1128 should_zero = 1;
1116 1129
1117 if (new) { 1130 if (new) {
1118 u32 tmp_pos; 1131 u32 tmp_pos;
@@ -1142,11 +1155,20 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1142 mlog_errno(ret); 1155 mlog_errno(ret);
1143 goto out; 1156 goto out;
1144 } 1157 }
1158 } else if (unwritten) {
1159 ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,
1160 wc->w_handle, cpos, 1, phys,
1161 meta_ac, &wc->w_dealloc);
1162 if (ret < 0) {
1163 mlog_errno(ret);
1164 goto out;
1165 }
1166 }
1145 1167
1168 if (should_zero)
1146 v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos); 1169 v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);
1147 } else { 1170 else
1148 v_blkno = user_pos >> inode->i_sb->s_blocksize_bits; 1171 v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;
1149 }
1150 1172
1151 /* 1173 /*
1152 * The only reason this should fail is due to an inability to 1174 * The only reason this should fail is due to an inability to
@@ -1169,7 +1191,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1169 1191
1170 tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, 1192 tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,
1171 wc->w_pages[i], cpos, 1193 wc->w_pages[i], cpos,
1172 user_pos, user_len, new); 1194 user_pos, user_len,
1195 should_zero);
1173 if (tmpret) { 1196 if (tmpret) {
1174 mlog_errno(tmpret); 1197 mlog_errno(tmpret);
1175 if (ret == 0) 1198 if (ret == 0)
@@ -1200,8 +1223,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1200 for (i = 0; i < wc->w_clen; i++) { 1223 for (i = 0; i < wc->w_clen; i++) {
1201 desc = &wc->w_desc[i]; 1224 desc = &wc->w_desc[i];
1202 1225
1203 ret = ocfs2_write_cluster(mapping, desc->c_phys, data_ac, 1226 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1204 meta_ac, wc, desc->c_cpos, pos, len); 1227 desc->c_unwritten, data_ac, meta_ac,
1228 wc, desc->c_cpos, pos, len);
1205 if (ret) { 1229 if (ret) {
1206 mlog_errno(ret); 1230 mlog_errno(ret);
1207 goto out; 1231 goto out;
@@ -1242,19 +1266,19 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1242 if (wc->w_large_pages) { 1266 if (wc->w_large_pages) {
1243 /* 1267 /*
1244 * We only care about the 1st and last cluster within 1268 * We only care about the 1st and last cluster within
1245 * our range and whether they are holes or not. Either 1269 * our range and whether they should be zero'd or not. Either
1246 * value may be extended out to the start/end of a 1270 * value may be extended out to the start/end of a
1247 * newly allocated cluster. 1271 * newly allocated cluster.
1248 */ 1272 */
1249 desc = &wc->w_desc[0]; 1273 desc = &wc->w_desc[0];
1250 if (desc->c_new) 1274 if (ocfs2_should_zero_cluster(desc))
1251 ocfs2_figure_cluster_boundaries(osb, 1275 ocfs2_figure_cluster_boundaries(osb,
1252 desc->c_cpos, 1276 desc->c_cpos,
1253 &wc->w_target_from, 1277 &wc->w_target_from,
1254 NULL); 1278 NULL);
1255 1279
1256 desc = &wc->w_desc[wc->w_clen - 1]; 1280 desc = &wc->w_desc[wc->w_clen - 1];
1257 if (desc->c_new) 1281 if (ocfs2_should_zero_cluster(desc))
1258 ocfs2_figure_cluster_boundaries(osb, 1282 ocfs2_figure_cluster_boundaries(osb,
1259 desc->c_cpos, 1283 desc->c_cpos,
1260 NULL, 1284 NULL,
@@ -1268,28 +1292,52 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1268/* 1292/*
1269 * Populate each single-cluster write descriptor in the write context 1293 * Populate each single-cluster write descriptor in the write context
1270 * with information about the i/o to be done. 1294 * with information about the i/o to be done.
1295 *
1296 * Returns the number of clusters that will have to be allocated, as
1297 * well as a worst case estimate of the number of extent records that
1298 * would have to be created during a write to an unwritten region.
1271 */ 1299 */
1272static int ocfs2_populate_write_desc(struct inode *inode, 1300static int ocfs2_populate_write_desc(struct inode *inode,
1273 struct ocfs2_write_ctxt *wc, 1301 struct ocfs2_write_ctxt *wc,
1274 unsigned int *clusters_to_alloc) 1302 unsigned int *clusters_to_alloc,
1303 unsigned int *extents_to_split)
1275{ 1304{
1276 int ret; 1305 int ret;
1277 struct ocfs2_write_cluster_desc *desc; 1306 struct ocfs2_write_cluster_desc *desc;
1278 unsigned int num_clusters = 0; 1307 unsigned int num_clusters = 0;
1308 unsigned int ext_flags = 0;
1279 u32 phys = 0; 1309 u32 phys = 0;
1280 int i; 1310 int i;
1281 1311
1312 *clusters_to_alloc = 0;
1313 *extents_to_split = 0;
1314
1282 for (i = 0; i < wc->w_clen; i++) { 1315 for (i = 0; i < wc->w_clen; i++) {
1283 desc = &wc->w_desc[i]; 1316 desc = &wc->w_desc[i];
1284 desc->c_cpos = wc->w_cpos + i; 1317 desc->c_cpos = wc->w_cpos + i;
1285 1318
1286 if (num_clusters == 0) { 1319 if (num_clusters == 0) {
1320 /*
1321 * Need to look up the next extent record.
1322 */
1287 ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys, 1323 ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys,
1288 &num_clusters, NULL); 1324 &num_clusters, &ext_flags);
1289 if (ret) { 1325 if (ret) {
1290 mlog_errno(ret); 1326 mlog_errno(ret);
1291 goto out; 1327 goto out;
1292 } 1328 }
1329
1330 /*
1331 * Assume worst case - that we're writing in
1332 * the middle of the extent.
1333 *
1334 * We can assume that the write proceeds from
1335 * left to right, in which case the extent
1336 * insert code is smart enough to coalesce the
1337 * next splits into the previous records created.
1338 */
1339 if (ext_flags & OCFS2_EXT_UNWRITTEN)
1340 *extents_to_split = *extents_to_split + 2;
1293 } else if (phys) { 1341 } else if (phys) {
1294 /* 1342 /*
1295 * Only increment phys if it doesn't describe 1343 * Only increment phys if it doesn't describe
@@ -1303,6 +1351,8 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1303 desc->c_new = 1; 1351 desc->c_new = 1;
1304 *clusters_to_alloc = *clusters_to_alloc + 1; 1352 *clusters_to_alloc = *clusters_to_alloc + 1;
1305 } 1353 }
1354 if (ext_flags & OCFS2_EXT_UNWRITTEN)
1355 desc->c_unwritten = 1;
1306 1356
1307 num_clusters--; 1357 num_clusters--;
1308 } 1358 }
@@ -1318,7 +1368,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1318 struct buffer_head *di_bh, struct page *mmap_page) 1368 struct buffer_head *di_bh, struct page *mmap_page)
1319{ 1369{
1320 int ret, credits = OCFS2_INODE_UPDATE_CREDITS; 1370 int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
1321 unsigned int clusters_to_alloc = 0; 1371 unsigned int clusters_to_alloc, extents_to_split;
1322 struct ocfs2_write_ctxt *wc; 1372 struct ocfs2_write_ctxt *wc;
1323 struct inode *inode = mapping->host; 1373 struct inode *inode = mapping->host;
1324 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1374 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -1333,7 +1383,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1333 return ret; 1383 return ret;
1334 } 1384 }
1335 1385
1336 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc); 1386 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
1387 &extents_to_split);
1337 if (ret) { 1388 if (ret) {
1338 mlog_errno(ret); 1389 mlog_errno(ret);
1339 goto out; 1390 goto out;
@@ -1347,14 +1398,14 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1347 * write out. An allocation requires that we write the entire 1398 * write out. An allocation requires that we write the entire
1348 * cluster range. 1399 * cluster range.
1349 */ 1400 */
1350 if (clusters_to_alloc > 0) { 1401 if (clusters_to_alloc || extents_to_split) {
1351 /* 1402 /*
1352 * XXX: We are stretching the limits of 1403 * XXX: We are stretching the limits of
1353 * ocfs2_lock_allocators(). It greately over-estimates 1404 * ocfs2_lock_allocators(). It greatly over-estimates
1354 * the work to be done. 1405 * the work to be done.
1355 */ 1406 */
1356 ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, 1407 ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc,
1357 &data_ac, &meta_ac); 1408 extents_to_split, &data_ac, &meta_ac);
1358 if (ret) { 1409 if (ret) {
1359 mlog_errno(ret); 1410 mlog_errno(ret);
1360 goto out; 1411 goto out;
@@ -1365,7 +1416,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1365 1416
1366 } 1417 }
1367 1418
1368 ocfs2_set_target_boundaries(osb, wc, pos, len, clusters_to_alloc); 1419 ocfs2_set_target_boundaries(osb, wc, pos, len,
1420 clusters_to_alloc + extents_to_split);
1369 1421
1370 handle = ocfs2_start_trans(osb, credits); 1422 handle = ocfs2_start_trans(osb, credits);
1371 if (IS_ERR(handle)) { 1423 if (IS_ERR(handle)) {
@@ -1393,7 +1445,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1393 * extent. 1445 * extent.
1394 */ 1446 */
1395 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1447 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
1396 clusters_to_alloc, mmap_page); 1448 clusters_to_alloc + extents_to_split,
1449 mmap_page);
1397 if (ret) { 1450 if (ret) {
1398 mlog_errno(ret); 1451 mlog_errno(ret);
1399 goto out_commit; 1452 goto out_commit;
@@ -1538,11 +1591,12 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1538 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 1591 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1539 di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); 1592 di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
1540 di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 1593 di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
1541
1542 ocfs2_journal_dirty(handle, wc->w_di_bh); 1594 ocfs2_journal_dirty(handle, wc->w_di_bh);
1543 1595
1544 ocfs2_commit_trans(osb, handle); 1596 ocfs2_commit_trans(osb, handle);
1545 1597
1598 ocfs2_run_deallocs(osb, &wc->w_dealloc);
1599
1546 ocfs2_free_write_ctxt(wc); 1600 ocfs2_free_write_ctxt(wc);
1547 1601
1548 return copied; 1602 return copied;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a80f31776d94..6745086da6fd 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -527,20 +527,21 @@ leave:
527 * understand sparse inodes. 527 * understand sparse inodes.
528 */ 528 */
529int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, 529int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
530 u32 clusters_to_add, 530 u32 clusters_to_add, u32 extents_to_split,
531 struct ocfs2_alloc_context **data_ac, 531 struct ocfs2_alloc_context **data_ac,
532 struct ocfs2_alloc_context **meta_ac) 532 struct ocfs2_alloc_context **meta_ac)
533{ 533{
534 int ret, num_free_extents; 534 int ret, num_free_extents;
535 unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
535 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 536 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
536 537
537 *meta_ac = NULL; 538 *meta_ac = NULL;
538 *data_ac = NULL; 539 *data_ac = NULL;
539 540
540 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " 541 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
541 "clusters_to_add = %u\n", 542 "clusters_to_add = %u, extents_to_split = %u\n",
542 (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), 543 (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
543 le32_to_cpu(di->i_clusters), clusters_to_add); 544 le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
544 545
545 num_free_extents = ocfs2_num_free_extents(osb, inode, di); 546 num_free_extents = ocfs2_num_free_extents(osb, inode, di);
546 if (num_free_extents < 0) { 547 if (num_free_extents < 0) {
@@ -558,9 +559,12 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
558 * 559 *
559 * Most of the time we'll only be seeing this 1 cluster at a time 560 * Most of the time we'll only be seeing this 1 cluster at a time
560 * anyway. 561 * anyway.
562 *
563 * Always lock for any unwritten extents - we might want to
564 * add blocks during a split.
561 */ 565 */
562 if (!num_free_extents || 566 if (!num_free_extents ||
563 (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) { 567 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
564 ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); 568 ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
565 if (ret < 0) { 569 if (ret < 0) {
566 if (ret != -ENOSPC) 570 if (ret != -ENOSPC)
@@ -641,7 +645,7 @@ restart_all:
641 down_write(&OCFS2_I(inode)->ip_alloc_sem); 645 down_write(&OCFS2_I(inode)->ip_alloc_sem);
642 drop_alloc_sem = 1; 646 drop_alloc_sem = 1;
643 647
644 status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac, 648 status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac,
645 &meta_ac); 649 &meta_ac);
646 if (status) { 650 if (status) {
647 mlog_errno(status); 651 mlog_errno(status);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index a4dd1fa1822b..54df3c4bd2fd 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -47,7 +47,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
47 struct ocfs2_alloc_context *meta_ac, 47 struct ocfs2_alloc_context *meta_ac,
48 enum ocfs2_alloc_restarted *reason); 48 enum ocfs2_alloc_restarted *reason);
49int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, 49int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
50 u32 clusters_to_add, 50 u32 clusters_to_add, u32 extents_to_split,
51 struct ocfs2_alloc_context **data_ac, 51 struct ocfs2_alloc_context **data_ac,
52 struct ocfs2_alloc_context **meta_ac); 52 struct ocfs2_alloc_context **meta_ac);
53int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 53int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);