diff options
author | Nick Piggin <npiggin@suse.de> | 2007-10-16 04:25:25 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-16 12:42:58 -0400 |
commit | 03158cd7eb3374843de68421142ca5900df845d9 (patch) | |
tree | 5e1bfc1f981651e7f7d7cf30afd15d7bd96b2a57 /fs | |
parent | b6af1bcd8720cb3062c8c4d4c8ba02bee10ff03f (diff) |
fs: restore nobh
Implement nobh in new aops. This is a bit tricky. FWIW, nobh_truncate is
now implemented in a way that does not create blocks in sparse regions,
which is a silly thing for it to have been doing (isn't it?)
ext2 survives fsx and fsstress. jfs is converted as well... ext3
should be easy to do (but not done yet).
[akpm@linux-foundation.org: coding-style fixes]
Cc: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/buffer.c | 229 | ||||
-rw-r--r-- | fs/ext2/inode.c | 20 | ||||
-rw-r--r-- | fs/jfs/inode.c | 7 |
3 files changed, 171 insertions, 85 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index a89d25bf1171..a406cfd89e3b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -2369,7 +2369,7 @@ out_unlock: | |||
2369 | } | 2369 | } |
2370 | 2370 | ||
2371 | /* | 2371 | /* |
2372 | * nobh_prepare_write()'s prereads are special: the buffer_heads are freed | 2372 | * nobh_write_begin()'s prereads are special: the buffer_heads are freed |
2373 | * immediately, while under the page lock. So it needs a special end_io | 2373 | * immediately, while under the page lock. So it needs a special end_io |
2374 | * handler which does not touch the bh after unlocking it. | 2374 | * handler which does not touch the bh after unlocking it. |
2375 | */ | 2375 | */ |
@@ -2379,16 +2379,45 @@ static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate) | |||
2379 | } | 2379 | } |
2380 | 2380 | ||
2381 | /* | 2381 | /* |
2382 | * Attach the singly-linked list of buffers created by nobh_write_begin, to | ||
2383 | * the page (converting it to circular linked list and taking care of page | ||
2384 | * dirty races). | ||
2385 | */ | ||
2386 | static void attach_nobh_buffers(struct page *page, struct buffer_head *head) | ||
2387 | { | ||
2388 | struct buffer_head *bh; | ||
2389 | |||
2390 | BUG_ON(!PageLocked(page)); | ||
2391 | |||
2392 | spin_lock(&page->mapping->private_lock); | ||
2393 | bh = head; | ||
2394 | do { | ||
2395 | if (PageDirty(page)) | ||
2396 | set_buffer_dirty(bh); | ||
2397 | if (!bh->b_this_page) | ||
2398 | bh->b_this_page = head; | ||
2399 | bh = bh->b_this_page; | ||
2400 | } while (bh != head); | ||
2401 | attach_page_buffers(page, head); | ||
2402 | spin_unlock(&page->mapping->private_lock); | ||
2403 | } | ||
2404 | |||
2405 | /* | ||
2382 | * On entry, the page is fully not uptodate. | 2406 | * On entry, the page is fully not uptodate. |
2383 | * On exit the page is fully uptodate in the areas outside (from,to) | 2407 | * On exit the page is fully uptodate in the areas outside (from,to) |
2384 | */ | 2408 | */ |
2385 | int nobh_prepare_write(struct page *page, unsigned from, unsigned to, | 2409 | int nobh_write_begin(struct file *file, struct address_space *mapping, |
2410 | loff_t pos, unsigned len, unsigned flags, | ||
2411 | struct page **pagep, void **fsdata, | ||
2386 | get_block_t *get_block) | 2412 | get_block_t *get_block) |
2387 | { | 2413 | { |
2388 | struct inode *inode = page->mapping->host; | 2414 | struct inode *inode = mapping->host; |
2389 | const unsigned blkbits = inode->i_blkbits; | 2415 | const unsigned blkbits = inode->i_blkbits; |
2390 | const unsigned blocksize = 1 << blkbits; | 2416 | const unsigned blocksize = 1 << blkbits; |
2391 | struct buffer_head *head, *bh; | 2417 | struct buffer_head *head, *bh; |
2418 | struct page *page; | ||
2419 | pgoff_t index; | ||
2420 | unsigned from, to; | ||
2392 | unsigned block_in_page; | 2421 | unsigned block_in_page; |
2393 | unsigned block_start, block_end; | 2422 | unsigned block_start, block_end; |
2394 | sector_t block_in_file; | 2423 | sector_t block_in_file; |
@@ -2397,8 +2426,23 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, | |||
2397 | int ret = 0; | 2426 | int ret = 0; |
2398 | int is_mapped_to_disk = 1; | 2427 | int is_mapped_to_disk = 1; |
2399 | 2428 | ||
2400 | if (page_has_buffers(page)) | 2429 | index = pos >> PAGE_CACHE_SHIFT; |
2401 | return block_prepare_write(page, from, to, get_block); | 2430 | from = pos & (PAGE_CACHE_SIZE - 1); |
2431 | to = from + len; | ||
2432 | |||
2433 | page = __grab_cache_page(mapping, index); | ||
2434 | if (!page) | ||
2435 | return -ENOMEM; | ||
2436 | *pagep = page; | ||
2437 | *fsdata = NULL; | ||
2438 | |||
2439 | if (page_has_buffers(page)) { | ||
2440 | unlock_page(page); | ||
2441 | page_cache_release(page); | ||
2442 | *pagep = NULL; | ||
2443 | return block_write_begin(file, mapping, pos, len, flags, pagep, | ||
2444 | fsdata, get_block); | ||
2445 | } | ||
2402 | 2446 | ||
2403 | if (PageMappedToDisk(page)) | 2447 | if (PageMappedToDisk(page)) |
2404 | return 0; | 2448 | return 0; |
@@ -2413,8 +2457,10 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, | |||
2413 | * than the circular one we're used to. | 2457 | * than the circular one we're used to. |
2414 | */ | 2458 | */ |
2415 | head = alloc_page_buffers(page, blocksize, 0); | 2459 | head = alloc_page_buffers(page, blocksize, 0); |
2416 | if (!head) | 2460 | if (!head) { |
2417 | return -ENOMEM; | 2461 | ret = -ENOMEM; |
2462 | goto out_release; | ||
2463 | } | ||
2418 | 2464 | ||
2419 | block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); | 2465 | block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); |
2420 | 2466 | ||
@@ -2483,15 +2529,12 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, | |||
2483 | if (is_mapped_to_disk) | 2529 | if (is_mapped_to_disk) |
2484 | SetPageMappedToDisk(page); | 2530 | SetPageMappedToDisk(page); |
2485 | 2531 | ||
2486 | do { | 2532 | *fsdata = head; /* to be released by nobh_write_end */ |
2487 | bh = head; | ||
2488 | head = head->b_this_page; | ||
2489 | free_buffer_head(bh); | ||
2490 | } while (head); | ||
2491 | 2533 | ||
2492 | return 0; | 2534 | return 0; |
2493 | 2535 | ||
2494 | failed: | 2536 | failed: |
2537 | BUG_ON(!ret); | ||
2495 | /* | 2538 | /* |
2496 | * Error recovery is a bit difficult. We need to zero out blocks that | 2539 | * Error recovery is a bit difficult. We need to zero out blocks that |
2497 | * were newly allocated, and dirty them to ensure they get written out. | 2540 | * were newly allocated, and dirty them to ensure they get written out. |
@@ -2499,64 +2542,57 @@ failed: | |||
2499 | * the handling of potential IO errors during writeout would be hard | 2542 | * the handling of potential IO errors during writeout would be hard |
2500 | * (could try doing synchronous writeout, but what if that fails too?) | 2543 | * (could try doing synchronous writeout, but what if that fails too?) |
2501 | */ | 2544 | */ |
2502 | spin_lock(&page->mapping->private_lock); | 2545 | attach_nobh_buffers(page, head); |
2503 | bh = head; | 2546 | page_zero_new_buffers(page, from, to); |
2504 | block_start = 0; | ||
2505 | do { | ||
2506 | if (PageUptodate(page)) | ||
2507 | set_buffer_uptodate(bh); | ||
2508 | if (PageDirty(page)) | ||
2509 | set_buffer_dirty(bh); | ||
2510 | 2547 | ||
2511 | block_end = block_start+blocksize; | 2548 | out_release: |
2512 | if (block_end <= from) | 2549 | unlock_page(page); |
2513 | goto next; | 2550 | page_cache_release(page); |
2514 | if (block_start >= to) | 2551 | *pagep = NULL; |
2515 | goto next; | ||
2516 | 2552 | ||
2517 | if (buffer_new(bh)) { | 2553 | if (pos + len > inode->i_size) |
2518 | clear_buffer_new(bh); | 2554 | vmtruncate(inode, inode->i_size); |
2519 | if (!buffer_uptodate(bh)) { | ||
2520 | zero_user_page(page, block_start, bh->b_size, KM_USER0); | ||
2521 | set_buffer_uptodate(bh); | ||
2522 | } | ||
2523 | mark_buffer_dirty(bh); | ||
2524 | } | ||
2525 | next: | ||
2526 | block_start = block_end; | ||
2527 | if (!bh->b_this_page) | ||
2528 | bh->b_this_page = head; | ||
2529 | bh = bh->b_this_page; | ||
2530 | } while (bh != head); | ||
2531 | attach_page_buffers(page, head); | ||
2532 | spin_unlock(&page->mapping->private_lock); | ||
2533 | 2555 | ||
2534 | return ret; | 2556 | return ret; |
2535 | } | 2557 | } |
2536 | EXPORT_SYMBOL(nobh_prepare_write); | 2558 | EXPORT_SYMBOL(nobh_write_begin); |
2537 | 2559 | ||
2538 | /* | 2560 | int nobh_write_end(struct file *file, struct address_space *mapping, |
2539 | * Make sure any changes to nobh_commit_write() are reflected in | 2561 | loff_t pos, unsigned len, unsigned copied, |
2540 | * nobh_truncate_page(), since it doesn't call commit_write(). | 2562 | struct page *page, void *fsdata) |
2541 | */ | ||
2542 | int nobh_commit_write(struct file *file, struct page *page, | ||
2543 | unsigned from, unsigned to) | ||
2544 | { | 2563 | { |
2545 | struct inode *inode = page->mapping->host; | 2564 | struct inode *inode = page->mapping->host; |
2546 | loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | 2565 | struct buffer_head *head = NULL; |
2566 | struct buffer_head *bh; | ||
2547 | 2567 | ||
2548 | if (page_has_buffers(page)) | 2568 | if (!PageMappedToDisk(page)) { |
2549 | return generic_commit_write(file, page, from, to); | 2569 | if (unlikely(copied < len) && !page_has_buffers(page)) |
2570 | attach_nobh_buffers(page, head); | ||
2571 | if (page_has_buffers(page)) | ||
2572 | return generic_write_end(file, mapping, pos, len, | ||
2573 | copied, page, fsdata); | ||
2574 | } | ||
2550 | 2575 | ||
2551 | SetPageUptodate(page); | 2576 | SetPageUptodate(page); |
2552 | set_page_dirty(page); | 2577 | set_page_dirty(page); |
2553 | if (pos > inode->i_size) { | 2578 | if (pos+copied > inode->i_size) { |
2554 | i_size_write(inode, pos); | 2579 | i_size_write(inode, pos+copied); |
2555 | mark_inode_dirty(inode); | 2580 | mark_inode_dirty(inode); |
2556 | } | 2581 | } |
2557 | return 0; | 2582 | |
2583 | unlock_page(page); | ||
2584 | page_cache_release(page); | ||
2585 | |||
2586 | head = fsdata; | ||
2587 | while (head) { | ||
2588 | bh = head; | ||
2589 | head = head->b_this_page; | ||
2590 | free_buffer_head(bh); | ||
2591 | } | ||
2592 | |||
2593 | return copied; | ||
2558 | } | 2594 | } |
2559 | EXPORT_SYMBOL(nobh_commit_write); | 2595 | EXPORT_SYMBOL(nobh_write_end); |
2560 | 2596 | ||
2561 | /* | 2597 | /* |
2562 | * nobh_writepage() - based on block_full_write_page() except | 2598 | * nobh_writepage() - based on block_full_write_page() except |
@@ -2609,44 +2645,79 @@ out: | |||
2609 | } | 2645 | } |
2610 | EXPORT_SYMBOL(nobh_writepage); | 2646 | EXPORT_SYMBOL(nobh_writepage); |
2611 | 2647 | ||
2612 | /* | 2648 | int nobh_truncate_page(struct address_space *mapping, |
2613 | * This function assumes that ->prepare_write() uses nobh_prepare_write(). | 2649 | loff_t from, get_block_t *get_block) |
2614 | */ | ||
2615 | int nobh_truncate_page(struct address_space *mapping, loff_t from) | ||
2616 | { | 2650 | { |
2617 | struct inode *inode = mapping->host; | ||
2618 | unsigned blocksize = 1 << inode->i_blkbits; | ||
2619 | pgoff_t index = from >> PAGE_CACHE_SHIFT; | 2651 | pgoff_t index = from >> PAGE_CACHE_SHIFT; |
2620 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 2652 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
2621 | unsigned to; | 2653 | unsigned blocksize; |
2654 | sector_t iblock; | ||
2655 | unsigned length, pos; | ||
2656 | struct inode *inode = mapping->host; | ||
2622 | struct page *page; | 2657 | struct page *page; |
2623 | const struct address_space_operations *a_ops = mapping->a_ops; | 2658 | struct buffer_head map_bh; |
2624 | int ret = 0; | 2659 | int err; |
2625 | 2660 | ||
2626 | if ((offset & (blocksize - 1)) == 0) | 2661 | blocksize = 1 << inode->i_blkbits; |
2627 | goto out; | 2662 | length = offset & (blocksize - 1); |
2663 | |||
2664 | /* Block boundary? Nothing to do */ | ||
2665 | if (!length) | ||
2666 | return 0; | ||
2667 | |||
2668 | length = blocksize - length; | ||
2669 | iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2628 | 2670 | ||
2629 | ret = -ENOMEM; | ||
2630 | page = grab_cache_page(mapping, index); | 2671 | page = grab_cache_page(mapping, index); |
2672 | err = -ENOMEM; | ||
2631 | if (!page) | 2673 | if (!page) |
2632 | goto out; | 2674 | goto out; |
2633 | 2675 | ||
2634 | to = (offset + blocksize) & ~(blocksize - 1); | 2676 | if (page_has_buffers(page)) { |
2635 | ret = a_ops->prepare_write(NULL, page, offset, to); | 2677 | has_buffers: |
2636 | if (ret == 0) { | 2678 | unlock_page(page); |
2637 | zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, | 2679 | page_cache_release(page); |
2638 | KM_USER0); | 2680 | return block_truncate_page(mapping, from, get_block); |
2639 | /* | ||
2640 | * It would be more correct to call aops->commit_write() | ||
2641 | * here, but this is more efficient. | ||
2642 | */ | ||
2643 | SetPageUptodate(page); | ||
2644 | set_page_dirty(page); | ||
2645 | } | 2681 | } |
2682 | |||
2683 | /* Find the buffer that contains "offset" */ | ||
2684 | pos = blocksize; | ||
2685 | while (offset >= pos) { | ||
2686 | iblock++; | ||
2687 | pos += blocksize; | ||
2688 | } | ||
2689 | |||
2690 | err = get_block(inode, iblock, &map_bh, 0); | ||
2691 | if (err) | ||
2692 | goto unlock; | ||
2693 | /* unmapped? It's a hole - nothing to do */ | ||
2694 | if (!buffer_mapped(&map_bh)) | ||
2695 | goto unlock; | ||
2696 | |||
2697 | /* Ok, it's mapped. Make sure it's up-to-date */ | ||
2698 | if (!PageUptodate(page)) { | ||
2699 | err = mapping->a_ops->readpage(NULL, page); | ||
2700 | if (err) { | ||
2701 | page_cache_release(page); | ||
2702 | goto out; | ||
2703 | } | ||
2704 | lock_page(page); | ||
2705 | if (!PageUptodate(page)) { | ||
2706 | err = -EIO; | ||
2707 | goto unlock; | ||
2708 | } | ||
2709 | if (page_has_buffers(page)) | ||
2710 | goto has_buffers; | ||
2711 | } | ||
2712 | zero_user_page(page, offset, length, KM_USER0); | ||
2713 | set_page_dirty(page); | ||
2714 | err = 0; | ||
2715 | |||
2716 | unlock: | ||
2646 | unlock_page(page); | 2717 | unlock_page(page); |
2647 | page_cache_release(page); | 2718 | page_cache_release(page); |
2648 | out: | 2719 | out: |
2649 | return ret; | 2720 | return err; |
2650 | } | 2721 | } |
2651 | EXPORT_SYMBOL(nobh_truncate_page); | 2722 | EXPORT_SYMBOL(nobh_truncate_page); |
2652 | 2723 | ||
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 63ab02aa4c52..1b102a1ccebb 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -659,6 +659,20 @@ ext2_write_begin(struct file *file, struct address_space *mapping, | |||
659 | return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata); | 659 | return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata); |
660 | } | 660 | } |
661 | 661 | ||
662 | static int | ||
663 | ext2_nobh_write_begin(struct file *file, struct address_space *mapping, | ||
664 | loff_t pos, unsigned len, unsigned flags, | ||
665 | struct page **pagep, void **fsdata) | ||
666 | { | ||
667 | /* | ||
668 | * Dir-in-pagecache still uses ext2_write_begin. Would have to rework | ||
669 | * directory handling code to pass around offsets rather than struct | ||
670 | * pages in order to make this work easily. | ||
671 | */ | ||
672 | return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | ||
673 | ext2_get_block); | ||
674 | } | ||
675 | |||
662 | static int ext2_nobh_writepage(struct page *page, | 676 | static int ext2_nobh_writepage(struct page *page, |
663 | struct writeback_control *wbc) | 677 | struct writeback_control *wbc) |
664 | { | 678 | { |
@@ -710,7 +724,8 @@ const struct address_space_operations ext2_nobh_aops = { | |||
710 | .readpages = ext2_readpages, | 724 | .readpages = ext2_readpages, |
711 | .writepage = ext2_nobh_writepage, | 725 | .writepage = ext2_nobh_writepage, |
712 | .sync_page = block_sync_page, | 726 | .sync_page = block_sync_page, |
713 | /* XXX: todo */ | 727 | .write_begin = ext2_nobh_write_begin, |
728 | .write_end = nobh_write_end, | ||
714 | .bmap = ext2_bmap, | 729 | .bmap = ext2_bmap, |
715 | .direct_IO = ext2_direct_IO, | 730 | .direct_IO = ext2_direct_IO, |
716 | .writepages = ext2_writepages, | 731 | .writepages = ext2_writepages, |
@@ -927,7 +942,8 @@ void ext2_truncate (struct inode * inode) | |||
927 | if (mapping_is_xip(inode->i_mapping)) | 942 | if (mapping_is_xip(inode->i_mapping)) |
928 | xip_truncate_page(inode->i_mapping, inode->i_size); | 943 | xip_truncate_page(inode->i_mapping, inode->i_size); |
929 | else if (test_opt(inode->i_sb, NOBH)) | 944 | else if (test_opt(inode->i_sb, NOBH)) |
930 | nobh_truncate_page(inode->i_mapping, inode->i_size); | 945 | nobh_truncate_page(inode->i_mapping, |
946 | inode->i_size, ext2_get_block); | ||
931 | else | 947 | else |
932 | block_truncate_page(inode->i_mapping, | 948 | block_truncate_page(inode->i_mapping, |
933 | inode->i_size, ext2_get_block); | 949 | inode->i_size, ext2_get_block); |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 6af378563611..4672013802e1 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
@@ -279,8 +279,7 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping, | |||
279 | loff_t pos, unsigned len, unsigned flags, | 279 | loff_t pos, unsigned len, unsigned flags, |
280 | struct page **pagep, void **fsdata) | 280 | struct page **pagep, void **fsdata) |
281 | { | 281 | { |
282 | *pagep = NULL; | 282 | return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
283 | return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | ||
284 | jfs_get_block); | 283 | jfs_get_block); |
285 | } | 284 | } |
286 | 285 | ||
@@ -306,7 +305,7 @@ const struct address_space_operations jfs_aops = { | |||
306 | .writepages = jfs_writepages, | 305 | .writepages = jfs_writepages, |
307 | .sync_page = block_sync_page, | 306 | .sync_page = block_sync_page, |
308 | .write_begin = jfs_write_begin, | 307 | .write_begin = jfs_write_begin, |
309 | .write_end = generic_write_end, | 308 | .write_end = nobh_write_end, |
310 | .bmap = jfs_bmap, | 309 | .bmap = jfs_bmap, |
311 | .direct_IO = jfs_direct_IO, | 310 | .direct_IO = jfs_direct_IO, |
312 | }; | 311 | }; |
@@ -359,7 +358,7 @@ void jfs_truncate(struct inode *ip) | |||
359 | { | 358 | { |
360 | jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); | 359 | jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); |
361 | 360 | ||
362 | block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); | 361 | nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); |
363 | 362 | ||
364 | IWRITE_LOCK(ip, RDWRLOCK_NORMAL); | 363 | IWRITE_LOCK(ip, RDWRLOCK_NORMAL); |
365 | jfs_truncate_nolock(ip, ip->i_size); | 364 | jfs_truncate_nolock(ip, ip->i_size); |