diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2007-02-16 14:46:50 -0500 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-04-26 18:02:20 -0400 |
commit | 60b11392f1a09433740bda3048202213daa27736 (patch) | |
tree | a8687fcb0ce62b130b732d663b54a984564d28b2 /fs/ocfs2/alloc.c | |
parent | 25baf2da1473d9dcde1a4c7b0ab26e7d67d9bf62 (diff) |
ocfs2: zero tail of sparse files on truncate
Since we don't zero on extend anymore, truncate needs to be fixed up to zero
the part of a file between i_size and and end of it's cluster. Otherwise a
subsequent extend could expose bad data.
This introduced a new helper, which can be used in ocfs2_write().
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/alloc.c')
-rw-r--r-- | fs/ocfs2/alloc.c | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 9a40603c4d4b..98694a1add43 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/swap.h> | ||
30 | 31 | ||
31 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | 32 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC |
32 | #include <cluster/masklog.h> | 33 | #include <cluster/masklog.h> |
@@ -34,6 +35,7 @@ | |||
34 | #include "ocfs2.h" | 35 | #include "ocfs2.h" |
35 | 36 | ||
36 | #include "alloc.h" | 37 | #include "alloc.h" |
38 | #include "aops.h" | ||
37 | #include "dlmglue.h" | 39 | #include "dlmglue.h" |
38 | #include "extent_map.h" | 40 | #include "extent_map.h" |
39 | #include "inode.h" | 41 | #include "inode.h" |
@@ -3342,6 +3344,228 @@ bail: | |||
3342 | return status; | 3344 | return status; |
3343 | } | 3345 | } |
3344 | 3346 | ||
3347 | static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh) | ||
3348 | { | ||
3349 | set_buffer_uptodate(bh); | ||
3350 | mark_buffer_dirty(bh); | ||
3351 | return 0; | ||
3352 | } | ||
3353 | |||
3354 | static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh) | ||
3355 | { | ||
3356 | set_buffer_uptodate(bh); | ||
3357 | mark_buffer_dirty(bh); | ||
3358 | return ocfs2_journal_dirty_data(handle, bh); | ||
3359 | } | ||
3360 | |||
3361 | static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize, | ||
3362 | struct page **pages, int numpages, | ||
3363 | u64 phys, handle_t *handle) | ||
3364 | { | ||
3365 | int i, ret, partial = 0; | ||
3366 | void *kaddr; | ||
3367 | struct page *page; | ||
3368 | unsigned int from, to = PAGE_CACHE_SIZE; | ||
3369 | struct super_block *sb = inode->i_sb; | ||
3370 | |||
3371 | BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); | ||
3372 | |||
3373 | if (numpages == 0) | ||
3374 | goto out; | ||
3375 | |||
3376 | from = isize & (PAGE_CACHE_SIZE - 1); /* 1st page offset */ | ||
3377 | if (PAGE_CACHE_SHIFT > OCFS2_SB(sb)->s_clustersize_bits) { | ||
3378 | /* | ||
3379 | * Since 'from' has been capped to a value below page | ||
3380 | * size, this calculation won't be able to overflow | ||
3381 | * 'to' | ||
3382 | */ | ||
3383 | to = ocfs2_align_bytes_to_clusters(sb, from); | ||
3384 | |||
3385 | /* | ||
3386 | * The truncate tail in this case should never contain | ||
3387 | * more than one page at maximum. The loop below also | ||
3388 | * assumes this. | ||
3389 | */ | ||
3390 | BUG_ON(numpages != 1); | ||
3391 | } | ||
3392 | |||
3393 | for(i = 0; i < numpages; i++) { | ||
3394 | page = pages[i]; | ||
3395 | |||
3396 | BUG_ON(from > PAGE_CACHE_SIZE); | ||
3397 | BUG_ON(to > PAGE_CACHE_SIZE); | ||
3398 | |||
3399 | ret = ocfs2_map_page_blocks(page, &phys, inode, from, to, 0); | ||
3400 | if (ret) | ||
3401 | mlog_errno(ret); | ||
3402 | |||
3403 | kaddr = kmap_atomic(page, KM_USER0); | ||
3404 | memset(kaddr + from, 0, to - from); | ||
3405 | kunmap_atomic(kaddr, KM_USER0); | ||
3406 | |||
3407 | /* | ||
3408 | * Need to set the buffers we zero'd into uptodate | ||
3409 | * here if they aren't - ocfs2_map_page_blocks() | ||
3410 | * might've skipped some | ||
3411 | */ | ||
3412 | if (ocfs2_should_order_data(inode)) { | ||
3413 | ret = walk_page_buffers(handle, | ||
3414 | page_buffers(page), | ||
3415 | from, to, &partial, | ||
3416 | ocfs2_ordered_zero_func); | ||
3417 | if (ret < 0) | ||
3418 | mlog_errno(ret); | ||
3419 | } else { | ||
3420 | ret = walk_page_buffers(handle, page_buffers(page), | ||
3421 | from, to, &partial, | ||
3422 | ocfs2_writeback_zero_func); | ||
3423 | if (ret < 0) | ||
3424 | mlog_errno(ret); | ||
3425 | } | ||
3426 | |||
3427 | if (!partial) | ||
3428 | SetPageUptodate(page); | ||
3429 | |||
3430 | flush_dcache_page(page); | ||
3431 | |||
3432 | /* | ||
3433 | * Every page after the 1st one should be completely zero'd. | ||
3434 | */ | ||
3435 | from = 0; | ||
3436 | } | ||
3437 | out: | ||
3438 | if (pages) { | ||
3439 | for (i = 0; i < numpages; i++) { | ||
3440 | page = pages[i]; | ||
3441 | unlock_page(page); | ||
3442 | mark_page_accessed(page); | ||
3443 | page_cache_release(page); | ||
3444 | } | ||
3445 | } | ||
3446 | } | ||
3447 | |||
3448 | static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page **pages, | ||
3449 | int *num, u64 *phys) | ||
3450 | { | ||
3451 | int i, numpages = 0, ret = 0; | ||
3452 | unsigned int csize = OCFS2_SB(inode->i_sb)->s_clustersize; | ||
3453 | struct super_block *sb = inode->i_sb; | ||
3454 | struct address_space *mapping = inode->i_mapping; | ||
3455 | unsigned long index; | ||
3456 | u64 next_cluster_bytes; | ||
3457 | |||
3458 | BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); | ||
3459 | |||
3460 | /* Cluster boundary, so we don't need to grab any pages. */ | ||
3461 | if ((isize & (csize - 1)) == 0) | ||
3462 | goto out; | ||
3463 | |||
3464 | ret = ocfs2_extent_map_get_blocks(inode, isize >> sb->s_blocksize_bits, | ||
3465 | phys, NULL); | ||
3466 | if (ret) { | ||
3467 | mlog_errno(ret); | ||
3468 | goto out; | ||
3469 | } | ||
3470 | |||
3471 | /* Tail is a hole. */ | ||
3472 | if (*phys == 0) | ||
3473 | goto out; | ||
3474 | |||
3475 | next_cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, isize); | ||
3476 | index = isize >> PAGE_CACHE_SHIFT; | ||
3477 | do { | ||
3478 | pages[numpages] = grab_cache_page(mapping, index); | ||
3479 | if (!pages[numpages]) { | ||
3480 | ret = -ENOMEM; | ||
3481 | mlog_errno(ret); | ||
3482 | goto out; | ||
3483 | } | ||
3484 | |||
3485 | numpages++; | ||
3486 | index++; | ||
3487 | } while (index < (next_cluster_bytes >> PAGE_CACHE_SHIFT)); | ||
3488 | |||
3489 | out: | ||
3490 | if (ret != 0) { | ||
3491 | if (pages) { | ||
3492 | for (i = 0; i < numpages; i++) { | ||
3493 | if (pages[i]) { | ||
3494 | unlock_page(pages[i]); | ||
3495 | page_cache_release(pages[i]); | ||
3496 | } | ||
3497 | } | ||
3498 | } | ||
3499 | numpages = 0; | ||
3500 | } | ||
3501 | |||
3502 | *num = numpages; | ||
3503 | |||
3504 | return ret; | ||
3505 | } | ||
3506 | |||
3507 | /* | ||
3508 | * Zero the area past i_size but still within an allocated | ||
3509 | * cluster. This avoids exposing nonzero data on subsequent file | ||
3510 | * extends. | ||
3511 | * | ||
3512 | * We need to call this before i_size is updated on the inode because | ||
3513 | * otherwise block_write_full_page() will skip writeout of pages past | ||
3514 | * i_size. The new_i_size parameter is passed for this reason. | ||
3515 | */ | ||
3516 | int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle, | ||
3517 | u64 new_i_size) | ||
3518 | { | ||
3519 | int ret, numpages; | ||
3520 | struct page **pages = NULL; | ||
3521 | u64 phys; | ||
3522 | |||
3523 | /* | ||
3524 | * File systems which don't support sparse files zero on every | ||
3525 | * extend. | ||
3526 | */ | ||
3527 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | ||
3528 | return 0; | ||
3529 | |||
3530 | pages = kcalloc(ocfs2_pages_per_cluster(inode->i_sb), | ||
3531 | sizeof(struct page *), GFP_NOFS); | ||
3532 | if (pages == NULL) { | ||
3533 | ret = -ENOMEM; | ||
3534 | mlog_errno(ret); | ||
3535 | goto out; | ||
3536 | } | ||
3537 | |||
3538 | ret = ocfs2_grab_eof_pages(inode, new_i_size, pages, &numpages, &phys); | ||
3539 | if (ret) { | ||
3540 | mlog_errno(ret); | ||
3541 | goto out; | ||
3542 | } | ||
3543 | |||
3544 | /* | ||
3545 | * Truncate on an i_size boundary - nothing more to do. | ||
3546 | */ | ||
3547 | if (numpages == 0) | ||
3548 | goto out; | ||
3549 | |||
3550 | ocfs2_zero_cluster_pages(inode, new_i_size, pages, numpages, phys, | ||
3551 | handle); | ||
3552 | |||
3553 | /* | ||
3554 | * Initiate writeout of the pages we zero'd here. We don't | ||
3555 | * wait on them - the truncate_inode_pages() call later will | ||
3556 | * do that for us. | ||
3557 | */ | ||
3558 | ret = filemap_fdatawrite(inode->i_mapping); | ||
3559 | if (ret) | ||
3560 | mlog_errno(ret); | ||
3561 | |||
3562 | out: | ||
3563 | if (pages) | ||
3564 | kfree(pages); | ||
3565 | |||
3566 | return ret; | ||
3567 | } | ||
3568 | |||
3345 | /* | 3569 | /* |
3346 | * It is expected, that by the time you call this function, | 3570 | * It is expected, that by the time you call this function, |
3347 | * inode->i_size and fe->i_size have been adjusted. | 3571 | * inode->i_size and fe->i_size have been adjusted. |