From e422fd2c965ad1b0e4eadaabd0adb77e8a93e74e Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 5 May 2005 16:15:04 -0700 Subject: [PATCH] avoid -ENOMEM due reclaimable slab caches This makes sure that reclaimable buffer headers and reclaimable inodes are accounted properly during the overcommit checks. Signed-off-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 5f525b3c6d9f..6ed59497fd4d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3115,7 +3115,7 @@ void __init buffer_init(void) bh_cachep = kmem_cache_create("buffer_head", sizeof(struct buffer_head), 0, - SLAB_PANIC, init_buffer_head, NULL); + SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL); /* * Limit the bh occupancy to 10% of ZONE_NORMAL -- cgit v1.2.2 From f3ddbdc6267c32223035ea9bb8456a2d86f65ba1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 5 May 2005 16:15:45 -0700 Subject: [PATCH] fix race in __block_prepare_write Fix a race where __block_prepare_write can leak out an in-flight read against a bh if get_block returns an error. This can lead to the page becoming unlocked while the buffer is locked and the read still in flight. __mpage_writepage BUGs on this condition. BUG sighted on a 2-way Itanium2 system with 16K PAGE_SIZE running fsstress -v -d $DIR/tmp -n 1000 -p 1000 -l 2 where $DIR is a new ext2 filesystem with 4K blocks that is quite small (causing get_block to fail often with -ENOSPC). Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 6ed59497fd4d..af7c51ded2e1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1953,7 +1953,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (!buffer_mapped(bh)) { err = get_block(inode, block, bh, 1); if (err) - goto out; + break; if (buffer_new(bh)) { clear_buffer_new(bh); unmap_underlying_metadata(bh->b_bdev, @@ -1995,10 +1995,12 @@ static int __block_prepare_write(struct inode *inode, struct page *page, while(wait_bh > wait) { wait_on_buffer(*--wait_bh); if (!buffer_uptodate(*wait_bh)) - return -EIO; + err = -EIO; } - return 0; -out: + if (!err) + return err; + + /* Error case: */ /* * Zero out any newly allocated blocks to avoid exposing stale * data. If BH_New is set, we know that the block was newly -- cgit v1.2.2 From ad576e63e0c8b274a8558b8e05a6d0526e804dc0 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 5 May 2005 16:15:46 -0700 Subject: [PATCH] __block_write_full_page race fix When running fsstress -v -d $DIR/tmp -n 1000 -p 1000 -l 2 on an ext2 filesystem with 1024 byte block size, on SMP i386 with 4096 byte page size over loopback to an image file on a tmpfs filesystem, I would very quickly hit BUG_ON(!buffer_async_write(bh)); in fs/buffer.c:end_buffer_async_write It seems that more than one request would be submitted for a given bh at a time. What would happen is the following: 2 threads doing __mpage_writepages on the same page. Thread 1 - lock the page first, and enter __block_write_full_page. Thread 1 - (eg.) mark_buffer_async_write on the first 2 buffers. Thread 1 - set page writeback, unlock page. Thread 2 - lock page, wait on page writeback Thread 1 - submit_bh on the first 2 buffers. => both requests complete, none of the page buffers are async_write, end_page_writeback is called. Thread 2 - wakes up. enters __block_write_full_page. Thread 2 - mark_buffer_async_write on (eg.) the last buffer Thread 1 - finds the last buffer has async_write set, submit_bh on that. Thread 2 - submit_bh on the last buffer. => oops. So change __block_write_full_page to explicitly keep track of the last bh we need to issue, so we don't touch anything after issuing the last request. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index af7c51ded2e1..bc75f2e7b274 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1751,7 +1751,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, int err; sector_t block; sector_t last_block; - struct buffer_head *bh, *head; + struct buffer_head *bh, *head, *last_bh = NULL; int nr_underway = 0; BUG_ON(!PageLocked(page)); @@ -1809,7 +1809,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page, } while (bh != head); do { - get_bh(bh); if (!buffer_mapped(bh)) continue; /* @@ -1827,6 +1826,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page, } if (test_clear_buffer_dirty(bh)) { mark_buffer_async_write(bh); + get_bh(bh); + last_bh = bh; } else { unlock_buffer(bh); } @@ -1845,10 +1846,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, if (buffer_async_write(bh)) { submit_bh(WRITE, bh); nr_underway++; + put_bh(bh); + if (bh == last_bh) + break; } - put_bh(bh); bh = next; } while (bh != head); + bh = head; err = 0; done: @@ -1887,10 +1891,11 @@ recover: bh = head; /* Recovery: lock and submit the mapped buffers */ do { - get_bh(bh); if (buffer_mapped(bh) && buffer_dirty(bh)) { lock_buffer(bh); mark_buffer_async_write(bh); + get_bh(bh); + last_bh = bh; } else { /* * The buffer may have been set dirty during @@ -1909,10 +1914,13 @@ recover: clear_buffer_dirty(bh); submit_bh(WRITE, bh); nr_underway++; + put_bh(bh); + if (bh == last_bh) + break; } - put_bh(bh); bh = next; } while (bh != head); + bh = head; goto done; } -- cgit v1.2.2 From 05937baae9fc27b64bcd4378da7d2b14edf7931c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 5 May 2005 16:15:47 -0700 Subject: [PATCH] __block_write_full_page speedup Remove all those get_bh()'s and put_bh()'s by extending lock_page() to cover the troublesome regions. (get_bh() and put_bh() happen every time whereas contention on a page's lock in there happens basically never). Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index bc75f2e7b274..6f2c3303a443 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1826,7 +1826,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page, } if (test_clear_buffer_dirty(bh)) { mark_buffer_async_write(bh); - get_bh(bh); last_bh = bh; } else { unlock_buffer(bh); @@ -1839,20 +1838,19 @@ static int __block_write_full_page(struct inode *inode, struct page *page, */ BUG_ON(PageWriteback(page)); set_page_writeback(page); - unlock_page(page); do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { submit_bh(WRITE, bh); nr_underway++; - put_bh(bh); if (bh == last_bh) break; } bh = next; } while (bh != head); bh = head; + unlock_page(page); err = 0; done: @@ -1894,7 +1892,6 @@ recover: if (buffer_mapped(bh) && buffer_dirty(bh)) { lock_buffer(bh); mark_buffer_async_write(bh); - get_bh(bh); last_bh = bh; } else { /* @@ -1914,7 +1911,6 @@ recover: clear_buffer_dirty(bh); submit_bh(WRITE, bh); nr_underway++; - put_bh(bh); if (bh == last_bh) break; } -- cgit v1.2.2 From f0fbd5fc09b20f7ba7bc8c80be33e39925bb38e1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 5 May 2005 16:15:48 -0700 Subject: [PATCH] __block_write_full_page() simplification The `last_bh' logic probably isn't worth much. In those situations where only the front part of the page is being written out we will save some looping but in the vastly more common case of an all-page writeout if just adds more code. Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 6f2c3303a443..91ace8034bf7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1751,7 +1751,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, int err; sector_t block; sector_t last_block; - struct buffer_head *bh, *head, *last_bh = NULL; + struct buffer_head *bh, *head; int nr_underway = 0; BUG_ON(!PageLocked(page)); @@ -1826,7 +1826,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page, } if (test_clear_buffer_dirty(bh)) { mark_buffer_async_write(bh); - last_bh = bh; } else { unlock_buffer(bh); } @@ -1844,12 +1843,9 @@ static int __block_write_full_page(struct inode *inode, struct page *page, if (buffer_async_write(bh)) { submit_bh(WRITE, bh); nr_underway++; - if (bh == last_bh) - break; } bh = next; } while (bh != head); - bh = head; unlock_page(page); err = 0; @@ -1892,7 +1888,6 @@ recover: if (buffer_mapped(bh) && buffer_dirty(bh)) { lock_buffer(bh); mark_buffer_async_write(bh); - last_bh = bh; } else { /* * The buffer may have been set dirty during @@ -1911,12 +1906,9 @@ recover: clear_buffer_dirty(bh); submit_bh(WRITE, bh); nr_underway++; - if (bh == last_bh) - break; } bh = next; } while (bh != head); - bh = head; goto done; } -- cgit v1.2.2 From 75c96f85845a6707b0f9916cb263cb3584f7d48f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 5 May 2005 16:16:09 -0700 Subject: [PATCH] make some things static This patch makes some needlessly global identifiers static. Signed-off-by: Adrian Bunk Acked-by: Arjan van de Ven Acked-by: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 91ace8034bf7..6f88dcc6d002 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1210,7 +1210,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) return 1; } -struct buffer_head * +static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { /* Size must be multiple of hard sectorsize */ -- cgit v1.2.2 From c64610ba585fabb36be78782868277f3d9741a2e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 16 May 2005 21:53:49 -0700 Subject: [PATCH] block_read_full_page() get_block() error handling fix If block_read_full_page() detects an error when running get_block() it will run SetPageError(), then it will zero out the block in pagecache and will mark the buffer_head uptodate. So at the end of readahead we end up with a non-uptodate pagecache page which is marked PageError. But it has uptodate buffers. The pagefault code will run ClearPageError, will launch readpage a second time and block_read_full_page() will notice the uptodate buffers and will mark the page uptodate as well. We end up with an uptodate, !PageError page full of zeros and the error is lost. (It seems a little odd that filemap_nopage() runs ClearPageError(). I guess all of this adds up to meaning that for each attempted access to the page, the pagefault handler will retry the I/O. Which is good and bad. If the app is ignoring SIGBUS for some reason we could get a lot of back-to-back I/O errors.) Fix it by not marking the pagecache buffer_head as uptodate if the attempt to map that buffer to a disk block failed. Credit-to: Qu Fuping For reporting the bug and identifying its source. Signed-off-by: Qu Fuping Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 6f88dcc6d002..7e9e409feaa7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2094,9 +2094,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block) continue; if (!buffer_mapped(bh)) { + int err = 0; + fully_mapped = 0; if (iblock < lblock) { - if (get_block(inode, iblock, bh, 0)) + err = get_block(inode, iblock, bh, 0); + if (err) SetPageError(page); } if (!buffer_mapped(bh)) { @@ -2104,7 +2107,8 @@ int block_read_full_page(struct page *page, get_block_t *get_block) memset(kaddr + i * blocksize, 0, blocksize); flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - set_buffer_uptodate(bh); + if (!err) + set_buffer_uptodate(bh); continue; } /* -- cgit v1.2.2