aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2007-10-16 04:25:25 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 12:42:58 -0400
commit03158cd7eb3374843de68421142ca5900df845d9 (patch)
tree5e1bfc1f981651e7f7d7cf30afd15d7bd96b2a57 /fs
parentb6af1bcd8720cb3062c8c4d4c8ba02bee10ff03f (diff)
fs: restore nobh
Implement nobh in new aops. This is a bit tricky. FWIW, nobh_truncate is now implemented in a way that does not create blocks in sparse regions, which is a silly thing for it to have been doing (isn't it?) ext2 survives fsx and fsstress. jfs is converted as well... ext3 should be easy to do (but not done yet). [akpm@linux-foundation.org: coding-style fixes] Cc: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/buffer.c229
-rw-r--r--fs/ext2/inode.c20
-rw-r--r--fs/jfs/inode.c7
3 files changed, 171 insertions, 85 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index a89d25bf1171..a406cfd89e3b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2369,7 +2369,7 @@ out_unlock:
2369} 2369}
2370 2370
2371/* 2371/*
2372 * nobh_prepare_write()'s prereads are special: the buffer_heads are freed 2372 * nobh_write_begin()'s prereads are special: the buffer_heads are freed
2373 * immediately, while under the page lock. So it needs a special end_io 2373 * immediately, while under the page lock. So it needs a special end_io
2374 * handler which does not touch the bh after unlocking it. 2374 * handler which does not touch the bh after unlocking it.
2375 */ 2375 */
@@ -2379,16 +2379,45 @@ static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
2379} 2379}
2380 2380
2381/* 2381/*
2382 * Attach the singly-linked list of buffers created by nobh_write_begin, to
2383 * the page (converting it to circular linked list and taking care of page
2384 * dirty races).
2385 */
2386static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2387{
2388 struct buffer_head *bh;
2389
2390 BUG_ON(!PageLocked(page));
2391
2392 spin_lock(&page->mapping->private_lock);
2393 bh = head;
2394 do {
2395 if (PageDirty(page))
2396 set_buffer_dirty(bh);
2397 if (!bh->b_this_page)
2398 bh->b_this_page = head;
2399 bh = bh->b_this_page;
2400 } while (bh != head);
2401 attach_page_buffers(page, head);
2402 spin_unlock(&page->mapping->private_lock);
2403}
2404
2405/*
2382 * On entry, the page is fully not uptodate. 2406 * On entry, the page is fully not uptodate.
2383 * On exit the page is fully uptodate in the areas outside (from,to) 2407 * On exit the page is fully uptodate in the areas outside (from,to)
2384 */ 2408 */
2385int nobh_prepare_write(struct page *page, unsigned from, unsigned to, 2409int nobh_write_begin(struct file *file, struct address_space *mapping,
2410 loff_t pos, unsigned len, unsigned flags,
2411 struct page **pagep, void **fsdata,
2386 get_block_t *get_block) 2412 get_block_t *get_block)
2387{ 2413{
2388 struct inode *inode = page->mapping->host; 2414 struct inode *inode = mapping->host;
2389 const unsigned blkbits = inode->i_blkbits; 2415 const unsigned blkbits = inode->i_blkbits;
2390 const unsigned blocksize = 1 << blkbits; 2416 const unsigned blocksize = 1 << blkbits;
2391 struct buffer_head *head, *bh; 2417 struct buffer_head *head, *bh;
2418 struct page *page;
2419 pgoff_t index;
2420 unsigned from, to;
2392 unsigned block_in_page; 2421 unsigned block_in_page;
2393 unsigned block_start, block_end; 2422 unsigned block_start, block_end;
2394 sector_t block_in_file; 2423 sector_t block_in_file;
@@ -2397,8 +2426,23 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2397 int ret = 0; 2426 int ret = 0;
2398 int is_mapped_to_disk = 1; 2427 int is_mapped_to_disk = 1;
2399 2428
2400 if (page_has_buffers(page)) 2429 index = pos >> PAGE_CACHE_SHIFT;
2401 return block_prepare_write(page, from, to, get_block); 2430 from = pos & (PAGE_CACHE_SIZE - 1);
2431 to = from + len;
2432
2433 page = __grab_cache_page(mapping, index);
2434 if (!page)
2435 return -ENOMEM;
2436 *pagep = page;
2437 *fsdata = NULL;
2438
2439 if (page_has_buffers(page)) {
2440 unlock_page(page);
2441 page_cache_release(page);
2442 *pagep = NULL;
2443 return block_write_begin(file, mapping, pos, len, flags, pagep,
2444 fsdata, get_block);
2445 }
2402 2446
2403 if (PageMappedToDisk(page)) 2447 if (PageMappedToDisk(page))
2404 return 0; 2448 return 0;
@@ -2413,8 +2457,10 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2413 * than the circular one we're used to. 2457 * than the circular one we're used to.
2414 */ 2458 */
2415 head = alloc_page_buffers(page, blocksize, 0); 2459 head = alloc_page_buffers(page, blocksize, 0);
2416 if (!head) 2460 if (!head) {
2417 return -ENOMEM; 2461 ret = -ENOMEM;
2462 goto out_release;
2463 }
2418 2464
2419 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); 2465 block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2420 2466
@@ -2483,15 +2529,12 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2483 if (is_mapped_to_disk) 2529 if (is_mapped_to_disk)
2484 SetPageMappedToDisk(page); 2530 SetPageMappedToDisk(page);
2485 2531
2486 do { 2532 *fsdata = head; /* to be released by nobh_write_end */
2487 bh = head;
2488 head = head->b_this_page;
2489 free_buffer_head(bh);
2490 } while (head);
2491 2533
2492 return 0; 2534 return 0;
2493 2535
2494failed: 2536failed:
2537 BUG_ON(!ret);
2495 /* 2538 /*
2496 * Error recovery is a bit difficult. We need to zero out blocks that 2539 * Error recovery is a bit difficult. We need to zero out blocks that
2497 * were newly allocated, and dirty them to ensure they get written out. 2540 * were newly allocated, and dirty them to ensure they get written out.
@@ -2499,64 +2542,57 @@ failed:
2499 * the handling of potential IO errors during writeout would be hard 2542 * the handling of potential IO errors during writeout would be hard
2500 * (could try doing synchronous writeout, but what if that fails too?) 2543 * (could try doing synchronous writeout, but what if that fails too?)
2501 */ 2544 */
2502 spin_lock(&page->mapping->private_lock); 2545 attach_nobh_buffers(page, head);
2503 bh = head; 2546 page_zero_new_buffers(page, from, to);
2504 block_start = 0;
2505 do {
2506 if (PageUptodate(page))
2507 set_buffer_uptodate(bh);
2508 if (PageDirty(page))
2509 set_buffer_dirty(bh);
2510 2547
2511 block_end = block_start+blocksize; 2548out_release:
2512 if (block_end <= from) 2549 unlock_page(page);
2513 goto next; 2550 page_cache_release(page);
2514 if (block_start >= to) 2551 *pagep = NULL;
2515 goto next;
2516 2552
2517 if (buffer_new(bh)) { 2553 if (pos + len > inode->i_size)
2518 clear_buffer_new(bh); 2554 vmtruncate(inode, inode->i_size);
2519 if (!buffer_uptodate(bh)) {
2520 zero_user_page(page, block_start, bh->b_size, KM_USER0);
2521 set_buffer_uptodate(bh);
2522 }
2523 mark_buffer_dirty(bh);
2524 }
2525next:
2526 block_start = block_end;
2527 if (!bh->b_this_page)
2528 bh->b_this_page = head;
2529 bh = bh->b_this_page;
2530 } while (bh != head);
2531 attach_page_buffers(page, head);
2532 spin_unlock(&page->mapping->private_lock);
2533 2555
2534 return ret; 2556 return ret;
2535} 2557}
2536EXPORT_SYMBOL(nobh_prepare_write); 2558EXPORT_SYMBOL(nobh_write_begin);
2537 2559
2538/* 2560int nobh_write_end(struct file *file, struct address_space *mapping,
2539 * Make sure any changes to nobh_commit_write() are reflected in 2561 loff_t pos, unsigned len, unsigned copied,
2540 * nobh_truncate_page(), since it doesn't call commit_write(). 2562 struct page *page, void *fsdata)
2541 */
2542int nobh_commit_write(struct file *file, struct page *page,
2543 unsigned from, unsigned to)
2544{ 2563{
2545 struct inode *inode = page->mapping->host; 2564 struct inode *inode = page->mapping->host;
2546 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; 2565 struct buffer_head *head = NULL;
2566 struct buffer_head *bh;
2547 2567
2548 if (page_has_buffers(page)) 2568 if (!PageMappedToDisk(page)) {
2549 return generic_commit_write(file, page, from, to); 2569 if (unlikely(copied < len) && !page_has_buffers(page))
2570 attach_nobh_buffers(page, head);
2571 if (page_has_buffers(page))
2572 return generic_write_end(file, mapping, pos, len,
2573 copied, page, fsdata);
2574 }
2550 2575
2551 SetPageUptodate(page); 2576 SetPageUptodate(page);
2552 set_page_dirty(page); 2577 set_page_dirty(page);
2553 if (pos > inode->i_size) { 2578 if (pos+copied > inode->i_size) {
2554 i_size_write(inode, pos); 2579 i_size_write(inode, pos+copied);
2555 mark_inode_dirty(inode); 2580 mark_inode_dirty(inode);
2556 } 2581 }
2557 return 0; 2582
2583 unlock_page(page);
2584 page_cache_release(page);
2585
2586 head = fsdata;
2587 while (head) {
2588 bh = head;
2589 head = head->b_this_page;
2590 free_buffer_head(bh);
2591 }
2592
2593 return copied;
2558} 2594}
2559EXPORT_SYMBOL(nobh_commit_write); 2595EXPORT_SYMBOL(nobh_write_end);
2560 2596
2561/* 2597/*
2562 * nobh_writepage() - based on block_full_write_page() except 2598 * nobh_writepage() - based on block_full_write_page() except
@@ -2609,44 +2645,79 @@ out:
2609} 2645}
2610EXPORT_SYMBOL(nobh_writepage); 2646EXPORT_SYMBOL(nobh_writepage);
2611 2647
2612/* 2648int nobh_truncate_page(struct address_space *mapping,
2613 * This function assumes that ->prepare_write() uses nobh_prepare_write(). 2649 loff_t from, get_block_t *get_block)
2614 */
2615int nobh_truncate_page(struct address_space *mapping, loff_t from)
2616{ 2650{
2617 struct inode *inode = mapping->host;
2618 unsigned blocksize = 1 << inode->i_blkbits;
2619 pgoff_t index = from >> PAGE_CACHE_SHIFT; 2651 pgoff_t index = from >> PAGE_CACHE_SHIFT;
2620 unsigned offset = from & (PAGE_CACHE_SIZE-1); 2652 unsigned offset = from & (PAGE_CACHE_SIZE-1);
2621 unsigned to; 2653 unsigned blocksize;
2654 sector_t iblock;
2655 unsigned length, pos;
2656 struct inode *inode = mapping->host;
2622 struct page *page; 2657 struct page *page;
2623 const struct address_space_operations *a_ops = mapping->a_ops; 2658 struct buffer_head map_bh;
2624 int ret = 0; 2659 int err;
2625 2660
2626 if ((offset & (blocksize - 1)) == 0) 2661 blocksize = 1 << inode->i_blkbits;
2627 goto out; 2662 length = offset & (blocksize - 1);
2663
2664 /* Block boundary? Nothing to do */
2665 if (!length)
2666 return 0;
2667
2668 length = blocksize - length;
2669 iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
2628 2670
2629 ret = -ENOMEM;
2630 page = grab_cache_page(mapping, index); 2671 page = grab_cache_page(mapping, index);
2672 err = -ENOMEM;
2631 if (!page) 2673 if (!page)
2632 goto out; 2674 goto out;
2633 2675
2634 to = (offset + blocksize) & ~(blocksize - 1); 2676 if (page_has_buffers(page)) {
2635 ret = a_ops->prepare_write(NULL, page, offset, to); 2677has_buffers:
2636 if (ret == 0) { 2678 unlock_page(page);
2637 zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, 2679 page_cache_release(page);
2638 KM_USER0); 2680 return block_truncate_page(mapping, from, get_block);
2639 /*
2640 * It would be more correct to call aops->commit_write()
2641 * here, but this is more efficient.
2642 */
2643 SetPageUptodate(page);
2644 set_page_dirty(page);
2645 } 2681 }
2682
2683 /* Find the buffer that contains "offset" */
2684 pos = blocksize;
2685 while (offset >= pos) {
2686 iblock++;
2687 pos += blocksize;
2688 }
2689
2690 err = get_block(inode, iblock, &map_bh, 0);
2691 if (err)
2692 goto unlock;
2693 /* unmapped? It's a hole - nothing to do */
2694 if (!buffer_mapped(&map_bh))
2695 goto unlock;
2696
2697 /* Ok, it's mapped. Make sure it's up-to-date */
2698 if (!PageUptodate(page)) {
2699 err = mapping->a_ops->readpage(NULL, page);
2700 if (err) {
2701 page_cache_release(page);
2702 goto out;
2703 }
2704 lock_page(page);
2705 if (!PageUptodate(page)) {
2706 err = -EIO;
2707 goto unlock;
2708 }
2709 if (page_has_buffers(page))
2710 goto has_buffers;
2711 }
2712 zero_user_page(page, offset, length, KM_USER0);
2713 set_page_dirty(page);
2714 err = 0;
2715
2716unlock:
2646 unlock_page(page); 2717 unlock_page(page);
2647 page_cache_release(page); 2718 page_cache_release(page);
2648out: 2719out:
2649 return ret; 2720 return err;
2650} 2721}
2651EXPORT_SYMBOL(nobh_truncate_page); 2722EXPORT_SYMBOL(nobh_truncate_page);
2652 2723
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 63ab02aa4c52..1b102a1ccebb 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -659,6 +659,20 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
659 return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata); 659 return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
660} 660}
661 661
662static int
663ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
664 loff_t pos, unsigned len, unsigned flags,
665 struct page **pagep, void **fsdata)
666{
667 /*
668 * Dir-in-pagecache still uses ext2_write_begin. Would have to rework
669 * directory handling code to pass around offsets rather than struct
670 * pages in order to make this work easily.
671 */
672 return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
673 ext2_get_block);
674}
675
662static int ext2_nobh_writepage(struct page *page, 676static int ext2_nobh_writepage(struct page *page,
663 struct writeback_control *wbc) 677 struct writeback_control *wbc)
664{ 678{
@@ -710,7 +724,8 @@ const struct address_space_operations ext2_nobh_aops = {
710 .readpages = ext2_readpages, 724 .readpages = ext2_readpages,
711 .writepage = ext2_nobh_writepage, 725 .writepage = ext2_nobh_writepage,
712 .sync_page = block_sync_page, 726 .sync_page = block_sync_page,
713 /* XXX: todo */ 727 .write_begin = ext2_nobh_write_begin,
728 .write_end = nobh_write_end,
714 .bmap = ext2_bmap, 729 .bmap = ext2_bmap,
715 .direct_IO = ext2_direct_IO, 730 .direct_IO = ext2_direct_IO,
716 .writepages = ext2_writepages, 731 .writepages = ext2_writepages,
@@ -927,7 +942,8 @@ void ext2_truncate (struct inode * inode)
927 if (mapping_is_xip(inode->i_mapping)) 942 if (mapping_is_xip(inode->i_mapping))
928 xip_truncate_page(inode->i_mapping, inode->i_size); 943 xip_truncate_page(inode->i_mapping, inode->i_size);
929 else if (test_opt(inode->i_sb, NOBH)) 944 else if (test_opt(inode->i_sb, NOBH))
930 nobh_truncate_page(inode->i_mapping, inode->i_size); 945 nobh_truncate_page(inode->i_mapping,
946 inode->i_size, ext2_get_block);
931 else 947 else
932 block_truncate_page(inode->i_mapping, 948 block_truncate_page(inode->i_mapping,
933 inode->i_size, ext2_get_block); 949 inode->i_size, ext2_get_block);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6af378563611..4672013802e1 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -279,8 +279,7 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
279 loff_t pos, unsigned len, unsigned flags, 279 loff_t pos, unsigned len, unsigned flags,
280 struct page **pagep, void **fsdata) 280 struct page **pagep, void **fsdata)
281{ 281{
282 *pagep = NULL; 282 return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
283 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
284 jfs_get_block); 283 jfs_get_block);
285} 284}
286 285
@@ -306,7 +305,7 @@ const struct address_space_operations jfs_aops = {
306 .writepages = jfs_writepages, 305 .writepages = jfs_writepages,
307 .sync_page = block_sync_page, 306 .sync_page = block_sync_page,
308 .write_begin = jfs_write_begin, 307 .write_begin = jfs_write_begin,
309 .write_end = generic_write_end, 308 .write_end = nobh_write_end,
310 .bmap = jfs_bmap, 309 .bmap = jfs_bmap,
311 .direct_IO = jfs_direct_IO, 310 .direct_IO = jfs_direct_IO,
312}; 311};
@@ -359,7 +358,7 @@ void jfs_truncate(struct inode *ip)
359{ 358{
360 jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size); 359 jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
361 360
362 block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block); 361 nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
363 362
364 IWRITE_LOCK(ip, RDWRLOCK_NORMAL); 363 IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
365 jfs_truncate_nolock(ip, ip->i_size); 364 jfs_truncate_nolock(ip, ip->i_size);