aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/alloc.c
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2007-02-16 14:46:50 -0500
committerMark Fasheh <mark.fasheh@oracle.com>2007-04-26 18:02:20 -0400
commit60b11392f1a09433740bda3048202213daa27736 (patch)
treea8687fcb0ce62b130b732d663b54a984564d28b2 /fs/ocfs2/alloc.c
parent25baf2da1473d9dcde1a4c7b0ab26e7d67d9bf62 (diff)
ocfs2: zero tail of sparse files on truncate
Since we don't zero on extend anymore, truncate needs to be fixed up to zero the part of a file between i_size and and end of it's cluster. Otherwise a subsequent extend could expose bad data. This introduced a new helper, which can be used in ocfs2_write(). Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2/alloc.c')
-rw-r--r--fs/ocfs2/alloc.c224
1 files changed, 224 insertions, 0 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9a40603c4d4b..98694a1add43 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -27,6 +27,7 @@
27#include <linux/types.h> 27#include <linux/types.h>
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/swap.h>
30 31
31#define MLOG_MASK_PREFIX ML_DISK_ALLOC 32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
32#include <cluster/masklog.h> 33#include <cluster/masklog.h>
@@ -34,6 +35,7 @@
34#include "ocfs2.h" 35#include "ocfs2.h"
35 36
36#include "alloc.h" 37#include "alloc.h"
38#include "aops.h"
37#include "dlmglue.h" 39#include "dlmglue.h"
38#include "extent_map.h" 40#include "extent_map.h"
39#include "inode.h" 41#include "inode.h"
@@ -3342,6 +3344,228 @@ bail:
3342 return status; 3344 return status;
3343} 3345}
3344 3346
3347static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh)
3348{
3349 set_buffer_uptodate(bh);
3350 mark_buffer_dirty(bh);
3351 return 0;
3352}
3353
3354static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
3355{
3356 set_buffer_uptodate(bh);
3357 mark_buffer_dirty(bh);
3358 return ocfs2_journal_dirty_data(handle, bh);
3359}
3360
3361static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t isize,
3362 struct page **pages, int numpages,
3363 u64 phys, handle_t *handle)
3364{
3365 int i, ret, partial = 0;
3366 void *kaddr;
3367 struct page *page;
3368 unsigned int from, to = PAGE_CACHE_SIZE;
3369 struct super_block *sb = inode->i_sb;
3370
3371 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
3372
3373 if (numpages == 0)
3374 goto out;
3375
3376 from = isize & (PAGE_CACHE_SIZE - 1); /* 1st page offset */
3377 if (PAGE_CACHE_SHIFT > OCFS2_SB(sb)->s_clustersize_bits) {
3378 /*
3379 * Since 'from' has been capped to a value below page
3380 * size, this calculation won't be able to overflow
3381 * 'to'
3382 */
3383 to = ocfs2_align_bytes_to_clusters(sb, from);
3384
3385 /*
3386 * The truncate tail in this case should never contain
3387 * more than one page at maximum. The loop below also
3388 * assumes this.
3389 */
3390 BUG_ON(numpages != 1);
3391 }
3392
3393 for(i = 0; i < numpages; i++) {
3394 page = pages[i];
3395
3396 BUG_ON(from > PAGE_CACHE_SIZE);
3397 BUG_ON(to > PAGE_CACHE_SIZE);
3398
3399 ret = ocfs2_map_page_blocks(page, &phys, inode, from, to, 0);
3400 if (ret)
3401 mlog_errno(ret);
3402
3403 kaddr = kmap_atomic(page, KM_USER0);
3404 memset(kaddr + from, 0, to - from);
3405 kunmap_atomic(kaddr, KM_USER0);
3406
3407 /*
3408 * Need to set the buffers we zero'd into uptodate
3409 * here if they aren't - ocfs2_map_page_blocks()
3410 * might've skipped some
3411 */
3412 if (ocfs2_should_order_data(inode)) {
3413 ret = walk_page_buffers(handle,
3414 page_buffers(page),
3415 from, to, &partial,
3416 ocfs2_ordered_zero_func);
3417 if (ret < 0)
3418 mlog_errno(ret);
3419 } else {
3420 ret = walk_page_buffers(handle, page_buffers(page),
3421 from, to, &partial,
3422 ocfs2_writeback_zero_func);
3423 if (ret < 0)
3424 mlog_errno(ret);
3425 }
3426
3427 if (!partial)
3428 SetPageUptodate(page);
3429
3430 flush_dcache_page(page);
3431
3432 /*
3433 * Every page after the 1st one should be completely zero'd.
3434 */
3435 from = 0;
3436 }
3437out:
3438 if (pages) {
3439 for (i = 0; i < numpages; i++) {
3440 page = pages[i];
3441 unlock_page(page);
3442 mark_page_accessed(page);
3443 page_cache_release(page);
3444 }
3445 }
3446}
3447
3448static int ocfs2_grab_eof_pages(struct inode *inode, loff_t isize, struct page **pages,
3449 int *num, u64 *phys)
3450{
3451 int i, numpages = 0, ret = 0;
3452 unsigned int csize = OCFS2_SB(inode->i_sb)->s_clustersize;
3453 struct super_block *sb = inode->i_sb;
3454 struct address_space *mapping = inode->i_mapping;
3455 unsigned long index;
3456 u64 next_cluster_bytes;
3457
3458 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
3459
3460 /* Cluster boundary, so we don't need to grab any pages. */
3461 if ((isize & (csize - 1)) == 0)
3462 goto out;
3463
3464 ret = ocfs2_extent_map_get_blocks(inode, isize >> sb->s_blocksize_bits,
3465 phys, NULL);
3466 if (ret) {
3467 mlog_errno(ret);
3468 goto out;
3469 }
3470
3471 /* Tail is a hole. */
3472 if (*phys == 0)
3473 goto out;
3474
3475 next_cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, isize);
3476 index = isize >> PAGE_CACHE_SHIFT;
3477 do {
3478 pages[numpages] = grab_cache_page(mapping, index);
3479 if (!pages[numpages]) {
3480 ret = -ENOMEM;
3481 mlog_errno(ret);
3482 goto out;
3483 }
3484
3485 numpages++;
3486 index++;
3487 } while (index < (next_cluster_bytes >> PAGE_CACHE_SHIFT));
3488
3489out:
3490 if (ret != 0) {
3491 if (pages) {
3492 for (i = 0; i < numpages; i++) {
3493 if (pages[i]) {
3494 unlock_page(pages[i]);
3495 page_cache_release(pages[i]);
3496 }
3497 }
3498 }
3499 numpages = 0;
3500 }
3501
3502 *num = numpages;
3503
3504 return ret;
3505}
3506
3507/*
3508 * Zero the area past i_size but still within an allocated
3509 * cluster. This avoids exposing nonzero data on subsequent file
3510 * extends.
3511 *
3512 * We need to call this before i_size is updated on the inode because
3513 * otherwise block_write_full_page() will skip writeout of pages past
3514 * i_size. The new_i_size parameter is passed for this reason.
3515 */
3516int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
3517 u64 new_i_size)
3518{
3519 int ret, numpages;
3520 struct page **pages = NULL;
3521 u64 phys;
3522
3523 /*
3524 * File systems which don't support sparse files zero on every
3525 * extend.
3526 */
3527 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
3528 return 0;
3529
3530 pages = kcalloc(ocfs2_pages_per_cluster(inode->i_sb),
3531 sizeof(struct page *), GFP_NOFS);
3532 if (pages == NULL) {
3533 ret = -ENOMEM;
3534 mlog_errno(ret);
3535 goto out;
3536 }
3537
3538 ret = ocfs2_grab_eof_pages(inode, new_i_size, pages, &numpages, &phys);
3539 if (ret) {
3540 mlog_errno(ret);
3541 goto out;
3542 }
3543
3544 /*
3545 * Truncate on an i_size boundary - nothing more to do.
3546 */
3547 if (numpages == 0)
3548 goto out;
3549
3550 ocfs2_zero_cluster_pages(inode, new_i_size, pages, numpages, phys,
3551 handle);
3552
3553 /*
3554 * Initiate writeout of the pages we zero'd here. We don't
3555 * wait on them - the truncate_inode_pages() call later will
3556 * do that for us.
3557 */
3558 ret = filemap_fdatawrite(inode->i_mapping);
3559 if (ret)
3560 mlog_errno(ret);
3561
3562out:
3563 if (pages)
3564 kfree(pages);
3565
3566 return ret;
3567}
3568
3345/* 3569/*
3346 * It is expected, that by the time you call this function, 3570 * It is expected, that by the time you call this function,
3347 * inode->i_size and fe->i_size have been adjusted. 3571 * inode->i_size and fe->i_size have been adjusted.