aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2007-02-16 14:46:50 -0500
committerMark Fasheh <mark.fasheh@oracle.com>2007-04-26 18:02:20 -0400
commit60b11392f1a09433740bda3048202213daa27736 (patch)
treea8687fcb0ce62b130b732d663b54a984564d28b2 /fs
parent25baf2da1473d9dcde1a4c7b0ab26e7d67d9bf62 (diff)
ocfs2: zero tail of sparse files on truncate
Since we don't zero on extend anymore, truncate needs to be fixed up to zero the part of a file between i_size and and end of it's cluster. Otherwise a subsequent extend could expose bad data. This introduced a new helper, which can be used in ocfs2_write(). Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/alloc.c224
-rw-r--r--fs/ocfs2/alloc.h2
-rw-r--r--fs/ocfs2/aops.c34
-rw-r--r--fs/ocfs2/aops.h12
-rw-r--r--fs/ocfs2/file.c40
-rw-r--r--fs/ocfs2/inode.c30
-rw-r--r--fs/ocfs2/ocfs2.h11
7 files changed, 328 insertions, 25 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9a40603c4d4b..98694a1add43 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -27,6 +27,7 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/swap.h>
30 31
31#define MLOG_MASK_PREFIX ML_DISK_ALLOC 32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
32#include <cluster/masklog.h> 33#include <cluster/masklog.h>
@@ -34,6 +35,7 @@
34#include "ocfs2.h" 35#include "ocfs2.h"
35 36
36#include "alloc.h" 37#include "alloc.h"
38#include "aops.h"
37#include "dlmglue.h" 39#include "dlmglue.h"
38#include "extent_map.h" 40#include "extent_map.h"
39#include "inode.h" 41#include "inode.h"
@@ -3342,6 +3344,228 @@ bail:
3342 return status; 3344 return status;
3343} 3345}
3344 3346
3347static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh)
3348{
3349 set_buffer_uptodate(bh);
3350 mark_buffer_dirty(bh);
3351 return 0;
3352}
3353
3354static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
3355{
3356 set_buffer_uptodate(bh);
3357 mark_buffer_dirty(bh);
3358 return ocfs2_journal_dirty_data(handle, bh);
3359}
3360
3361static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize,
3362 struct page **pages, int numpages,
3363 u64 phys, handle_t *handle)
3364{
3365 int i, ret, partial = 0;
3366 void *kaddr;
3367 struct page *page;
3368 unsigned int from, to = PAGE_CACHE_SIZE;
3369 struct super_block *sb = inode->i_sb;
3370
3371 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
3372
3373 if (numpages == 0)
3374 goto out;
3375
3376 from = isize & (PAGE_CACHE_SIZE - 1); /* 1st page offset */
3377 if (PAGE_CACHE_SHIFT > OCFS2_SB(sb)->s_clustersize_bits) {
3378 /*
3379 * Since 'from' has been capped to a value below page
3380 * size, this calculation won't be able to overflow
3381 * 'to'
3382 */
3383 to = ocfs2_align_bytes_to_clusters(sb, from);
3384
3385 /*
3386 * The truncate tail in this case should never contain
3387 * more than one page at maximum. The loop below also
3388 * assumes this.
3389 */
3390 BUG_ON(numpages != 1);
3391 }
3392
3393 for(i = 0; i < numpages; i++) {
3394 page = pages[i];
3395
3396 BUG_ON(from > PAGE_CACHE_SIZE);
3397 BUG_ON(to > PAGE_CACHE_SIZE);
3398
3399 ret = ocfs2_map_page_blocks(page, &phys, inode, from, to, 0);
3400 if (ret)
3401 mlog_errno(ret);
3402
3403 kaddr = kmap_atomic(page, KM_USER0);
3404 memset(kaddr + from, 0, to - from);
3405 kunmap_atomic(kaddr, KM_USER0);
3406
3407 /*
3408 * Need to set the buffers we zero'd into uptodate
3409 * here if they aren't - ocfs2_map_page_blocks()
3410 * might've skipped some
3411 */
3412 if (ocfs2_should_order_data(inode)) {
3413 ret = walk_page_buffers(handle,
3414 page_buffers(page),
3415 from, to, &partial,
3416 ocfs2_ordered_zero_func);
3417 if (ret < 0)
3418 mlog_errno(ret);
3419 } else {
3420 ret = walk_page_buffers(handle, page_buffers(page),
3421 from, to, &partial,
3422 ocfs2_writeback_zero_func);
3423 if (ret < 0)
3424 mlog_errno(ret);
3425 }
3426
3427 if (!partial)
3428 SetPageUptodate(page);
3429
3430 flush_dcache_page(page);
3431
3432 /*
3433 * Every page after the 1st one should be completely zero'd.
3434 */
3435 from = 0;
3436 }
3437out:
3438 if (pages) {
3439 for (i = 0; i < numpages; i++) {
3440 page = pages[i];
3441 unlock_page(page);
3442 mark_page_accessed(page);
3443 page_cache_release(page);
3444 }
3445 }
3446}
3447
3448static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page **pages,
3449 int *num, u64 *phys)
3450{
3451 int i, numpages = 0, ret = 0;
3452 unsigned int csize = OCFS2_SB(inode->i_sb)->s_clustersize;
3453 struct super_block *sb = inode->i_sb;
3454 struct address_space *mapping = inode->i_mapping;
3455 unsigned long index;
3456 u64 next_cluster_bytes;
3457
3458 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
3459
3460 /* Cluster boundary, so we don't need to grab any pages. */
3461 if ((isize & (csize - 1)) == 0)
3462 goto out;
3463
3464 ret = ocfs2_extent_map_get_blocks(inode, isize >> sb->s_blocksize_bits,
3465 phys, NULL);
3466 if (ret) {
3467 mlog_errno(ret);
3468 goto out;
3469 }
3470
3471 /* Tail is a hole. */
3472 if (*phys == 0)
3473 goto out;
3474
3475 next_cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, isize);
3476 index = isize >> PAGE_CACHE_SHIFT;
3477 do {
3478 pages[numpages] = grab_cache_page(mapping, index);
3479 if (!pages[numpages]) {
3480 ret = -ENOMEM;
3481 mlog_errno(ret);
3482 goto out;
3483 }
3484
3485 numpages++;
3486 index++;
3487 } while (index < (next_cluster_bytes >> PAGE_CACHE_SHIFT));
3488
3489out:
3490 if (ret != 0) {
3491 if (pages) {
3492 for (i = 0; i < numpages; i++) {
3493 if (pages[i]) {
3494 unlock_page(pages[i]);
3495 page_cache_release(pages[i]);
3496 }
3497 }
3498 }
3499 numpages = 0;
3500 }
3501
3502 *num = numpages;
3503
3504 return ret;
3505}
3506
3507/*
3508 * Zero the area past i_size but still within an allocated
3509 * cluster. This avoids exposing nonzero data on subsequent file
3510 * extends.
3511 *
3512 * We need to call this before i_size is updated on the inode because
3513 * otherwise block_write_full_page() will skip writeout of pages past
3514 * i_size. The new_i_size parameter is passed for this reason.
3515 */
3516int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
3517 u64 new_i_size)
3518{
3519 int ret, numpages;
3520 struct page **pages = NULL;
3521 u64 phys;
3522
3523 /*
3524 * File systems which don't support sparse files zero on every
3525 * extend.
3526 */
3527 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
3528 return 0;
3529
3530 pages = kcalloc(ocfs2_pages_per_cluster(inode->i_sb),
3531 sizeof(struct page *), GFP_NOFS);
3532 if (pages == NULL) {
3533 ret = -ENOMEM;
3534 mlog_errno(ret);
3535 goto out;
3536 }
3537
3538 ret = ocfs2_grab_eof_pages(inode, new_i_size, pages, &numpages, &phys);
3539 if (ret) {
3540 mlog_errno(ret);
3541 goto out;
3542 }
3543
3544 /*
3545 * Truncate on an i_size boundary - nothing more to do.
3546 */
3547 if (numpages == 0)
3548 goto out;
3549
3550 ocfs2_zero_cluster_pages(inode, new_i_size, pages, numpages, phys,
3551 handle);
3552
3553 /*
3554 * Initiate writeout of the pages we zero'd here. We don't
3555 * wait on them - the truncate_inode_pages() call later will
3556 * do that for us.
3557 */
3558 ret = filemap_fdatawrite(inode->i_mapping);
3559 if (ret)
3560 mlog_errno(ret);
3561
3562out:
3563 if (pages)
3564 kfree(pages);
3565
3566 return ret;
3567}
3568
3345/* 3569/*
3346 * It is expected, that by the time you call this function, 3570 * It is expected, that by the time you call this function,
3347 * inode->i_size and fe->i_size have been adjusted. 3571 * inode->i_size and fe->i_size have been adjusted.
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index bff2a162b030..3cb39cd5e478 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -71,6 +71,8 @@ struct ocfs2_truncate_context {
71 struct buffer_head *tc_last_eb_bh; 71 struct buffer_head *tc_last_eb_bh;
72}; 72};
73 73
74int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
75 u64 new_i_size);
74int ocfs2_prepare_truncate(struct ocfs2_super *osb, 76int ocfs2_prepare_truncate(struct ocfs2_super *osb,
75 struct inode *inode, 77 struct inode *inode,
76 struct buffer_head *fe_bh, 78 struct buffer_head *fe_bh,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index acf8f0006725..605c82a93f01 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -308,13 +308,13 @@ int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
308 * functionality yet, but IMHO it's better to cut and paste the whole 308 * functionality yet, but IMHO it's better to cut and paste the whole
309 * thing so we can avoid introducing our own bugs (and easily pick up 309 * thing so we can avoid introducing our own bugs (and easily pick up
310 * their fixes when they happen) --Mark */ 310 * their fixes when they happen) --Mark */
311static int walk_page_buffers( handle_t *handle, 311int walk_page_buffers( handle_t *handle,
312 struct buffer_head *head, 312 struct buffer_head *head,
313 unsigned from, 313 unsigned from,
314 unsigned to, 314 unsigned to,
315 int *partial, 315 int *partial,
316 int (*fn)( handle_t *handle, 316 int (*fn)( handle_t *handle,
317 struct buffer_head *bh)) 317 struct buffer_head *bh))
318{ 318{
319 struct buffer_head *bh; 319 struct buffer_head *bh;
320 unsigned block_start, block_end; 320 unsigned block_start, block_end;
@@ -654,9 +654,9 @@ static void ocfs2_clear_page_regions(struct page *page,
654 * 654 *
655 * This will also skip zeroing, which is handled externally. 655 * This will also skip zeroing, which is handled externally.
656 */ 656 */
657static int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, 657int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
658 struct inode *inode, unsigned int from, 658 struct inode *inode, unsigned int from,
659 unsigned int to, int new) 659 unsigned int to, int new)
660{ 660{
661 int ret = 0; 661 int ret = 0;
662 struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; 662 struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
@@ -675,8 +675,7 @@ static int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
675 * Ignore blocks outside of our i/o range - 675 * Ignore blocks outside of our i/o range -
676 * they may belong to unallocated clusters. 676 * they may belong to unallocated clusters.
677 */ 677 */
678 if (block_start >= to || 678 if (block_start >= to || block_end <= from) {
679 (block_start + bsize) <= from) {
680 if (PageUptodate(page)) 679 if (PageUptodate(page))
681 set_buffer_uptodate(bh); 680 set_buffer_uptodate(bh);
682 continue; 681 continue;
@@ -971,7 +970,6 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
971 u64 v_blkno, p_blkno; 970 u64 v_blkno, p_blkno;
972 struct address_space *mapping = file->f_mapping; 971 struct address_space *mapping = file->f_mapping;
973 struct inode *inode = mapping->host; 972 struct inode *inode = mapping->host;
974 unsigned int cbits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
975 unsigned long index, start; 973 unsigned long index, start;
976 struct page **cpages; 974 struct page **cpages;
977 975
@@ -979,13 +977,11 @@ static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
979 977
980 /* 978 /*
981 * Figure out how many pages we'll be manipulating here. For 979 * Figure out how many pages we'll be manipulating here. For
982 * non-allocating write, or any writes where cluster size is 980 * non allocating write, we just change the one
983 * less than page size, we only need one page. Otherwise, 981 * page. Otherwise, we'll need a whole clusters worth.
984 * allocating writes of cluster size larger than page size
985 * need cluster size pages.
986 */ 982 */
987 if (new && !wc->w_large_pages) 983 if (new)
988 numpages = (1 << cbits) / PAGE_SIZE; 984 numpages = ocfs2_pages_per_cluster(inode->i_sb);
989 985
990 cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS); 986 cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS);
991 if (!cpages) { 987 if (!cpages) {
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index eeb2c42483e8..7d94071f0ab7 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -30,6 +30,18 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
30 unsigned from, 30 unsigned from,
31 unsigned to); 31 unsigned to);
32 32
33int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
34 struct inode *inode, unsigned int from,
35 unsigned int to, int new);
36
37int walk_page_buffers( handle_t *handle,
38 struct buffer_head *head,
39 unsigned from,
40 unsigned to,
41 int *partial,
42 int (*fn)( handle_t *handle,
43 struct buffer_head *bh));
44
33struct ocfs2_write_ctxt; 45struct ocfs2_write_ctxt;
34typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *, 46typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *,
35 u64 *, unsigned int *, unsigned int *); 47 u64 *, unsigned int *, unsigned int *);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 667e5a869bf5..5fd49ec169dc 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -262,6 +262,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
262{ 262{
263 int status; 263 int status;
264 handle_t *handle; 264 handle_t *handle;
265 struct ocfs2_dinode *di;
265 266
266 mlog_entry_void(); 267 mlog_entry_void();
267 268
@@ -275,12 +276,39 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
275 goto out; 276 goto out;
276 } 277 }
277 278
278 status = ocfs2_set_inode_size(handle, inode, fe_bh, new_i_size); 279 status = ocfs2_journal_access(handle, inode, fe_bh,
280 OCFS2_JOURNAL_ACCESS_WRITE);
281 if (status < 0) {
282 mlog_errno(status);
283 goto out_commit;
284 }
285
286 /*
287 * Do this before setting i_size.
288 */
289 status = ocfs2_zero_tail_for_truncate(inode, handle, new_i_size);
290 if (status) {
291 mlog_errno(status);
292 goto out_commit;
293 }
294
295 i_size_write(inode, new_i_size);
296 inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size);
297 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
298
299 di = (struct ocfs2_dinode *) fe_bh->b_data;
300 di->i_size = cpu_to_le64(new_i_size);
301 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
302 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
303
304 status = ocfs2_journal_dirty(handle, fe_bh);
279 if (status < 0) 305 if (status < 0)
280 mlog_errno(status); 306 mlog_errno(status);
281 307
308out_commit:
282 ocfs2_commit_trans(osb, handle); 309 ocfs2_commit_trans(osb, handle);
283out: 310out:
311
284 mlog_exit(status); 312 mlog_exit(status);
285 return status; 313 return status;
286} 314}
@@ -343,7 +371,6 @@ static int ocfs2_truncate_file(struct inode *inode,
343 mlog_errno(status); 371 mlog_errno(status);
344 goto bail; 372 goto bail;
345 } 373 }
346 ocfs2_data_unlock(inode, 1);
347 374
348 /* alright, we're going to need to do a full blown alloc size 375 /* alright, we're going to need to do a full blown alloc size
349 * change. Orphan the inode so that recovery can complete the 376 * change. Orphan the inode so that recovery can complete the
@@ -352,22 +379,25 @@ static int ocfs2_truncate_file(struct inode *inode,
352 status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size); 379 status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
353 if (status < 0) { 380 if (status < 0) {
354 mlog_errno(status); 381 mlog_errno(status);
355 goto bail; 382 goto bail_unlock_data;
356 } 383 }
357 384
358 status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); 385 status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc);
359 if (status < 0) { 386 if (status < 0) {
360 mlog_errno(status); 387 mlog_errno(status);
361 goto bail; 388 goto bail_unlock_data;
362 } 389 }
363 390
364 status = ocfs2_commit_truncate(osb, inode, di_bh, tc); 391 status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
365 if (status < 0) { 392 if (status < 0) {
366 mlog_errno(status); 393 mlog_errno(status);
367 goto bail; 394 goto bail_unlock_data;
368 } 395 }
369 396
370 /* TODO: orphan dir cleanup here. */ 397 /* TODO: orphan dir cleanup here. */
398bail_unlock_data:
399 ocfs2_data_unlock(inode, 1);
400
371bail: 401bail:
372 402
373 mlog_exit(status); 403 mlog_exit(status);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 0bd86a137591..78c99b5050df 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -489,12 +489,38 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
489 int status = 0; 489 int status = 0;
490 struct ocfs2_truncate_context *tc = NULL; 490 struct ocfs2_truncate_context *tc = NULL;
491 struct ocfs2_dinode *fe; 491 struct ocfs2_dinode *fe;
492 handle_t *handle = NULL;
492 493
493 mlog_entry_void(); 494 mlog_entry_void();
494 495
495 fe = (struct ocfs2_dinode *) fe_bh->b_data; 496 fe = (struct ocfs2_dinode *) fe_bh->b_data;
496 497
497 if (fe->i_clusters) { 498 if (fe->i_clusters) {
499 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
500 if (IS_ERR(handle)) {
501 status = PTR_ERR(handle);
502 mlog_errno(status);
503 goto out;
504 }
505
506 status = ocfs2_journal_access(handle, inode, fe_bh,
507 OCFS2_JOURNAL_ACCESS_WRITE);
508 if (status < 0) {
509 mlog_errno(status);
510 goto out;
511 }
512
513 i_size_write(inode, 0);
514
515 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
516 if (status < 0) {
517 mlog_errno(status);
518 goto out;
519 }
520
521 ocfs2_commit_trans(osb, handle);
522 handle = NULL;
523
498 status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); 524 status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc);
499 if (status < 0) { 525 if (status < 0) {
500 mlog_errno(status); 526 mlog_errno(status);
@@ -507,8 +533,10 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
507 goto out; 533 goto out;
508 } 534 }
509 } 535 }
510out:
511 536
537out:
538 if (handle)
539 ocfs2_commit_trans(osb, handle);
512 mlog_exit(status); 540 mlog_exit(status);
513 return status; 541 return status;
514} 542}
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 2699f7cac21a..82cc92dcf8a6 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -495,6 +495,17 @@ static inline unsigned long ocfs2_align_clusters_to_page_index(struct super_bloc
495 return index; 495 return index;
496} 496}
497 497
498static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
499{
500 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
501 unsigned int pages_per_cluster = 1;
502
503 if (PAGE_CACHE_SHIFT < cbits)
504 pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT);
505
506 return pages_per_cluster;
507}
508
498#define ocfs2_set_bit ext2_set_bit 509#define ocfs2_set_bit ext2_set_bit
499#define ocfs2_clear_bit ext2_clear_bit 510#define ocfs2_clear_bit ext2_clear_bit
500#define ocfs2_test_bit ext2_test_bit 511#define ocfs2_test_bit ext2_test_bit