From e422fd2c965ad1b0e4eadaabd0adb77e8a93e74e Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@suse.de>
Date: Thu, 5 May 2005 16:15:04 -0700
Subject: [PATCH] avoid -ENOMEM due reclaimable slab caches

This makes sure that reclaimable buffer headers and reclaimable inodes
are accounted properly during the overcommit checks.

Signed-off-by: Andrea Arcangeli <andrea@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/buffer.c')

diff --git a/fs/buffer.c b/fs/buffer.c
index 5f525b3c6d9f..6ed59497fd4d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3115,7 +3115,7 @@ void __init buffer_init(void)
 
 	bh_cachep = kmem_cache_create("buffer_head",
 			sizeof(struct buffer_head), 0,
-			SLAB_PANIC, init_buffer_head, NULL);
+			SLAB_RECLAIM_ACCOUNT|SLAB_PANIC, init_buffer_head, NULL);
 
 	/*
 	 * Limit the bh occupancy to 10% of ZONE_NORMAL
-- 
cgit v1.2.2


From f3ddbdc6267c32223035ea9bb8456a2d86f65ba1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Thu, 5 May 2005 16:15:45 -0700
Subject: [PATCH] fix race in __block_prepare_write

Fix a race where __block_prepare_write can leak out an in-flight read
against a bh if get_block returns an error.  This can lead to the page
becoming unlocked while the buffer is locked and the read still in flight.
__mpage_writepage BUGs on this condition.

BUG sighted on a 2-way Itanium2 system with 16K PAGE_SIZE running

	fsstress -v -d $DIR/tmp -n 1000 -p 1000 -l 2

where $DIR is a new ext2 filesystem with 4K blocks that is quite
small (causing get_block to fail often with -ENOSPC).

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'fs/buffer.c')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6ed59497fd4d..af7c51ded2e1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1953,7 +1953,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 		if (!buffer_mapped(bh)) {
 			err = get_block(inode, block, bh, 1);
 			if (err)
-				goto out;
+				break;
 			if (buffer_new(bh)) {
 				clear_buffer_new(bh);
 				unmap_underlying_metadata(bh->b_bdev,
@@ -1995,10 +1995,12 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
 	while(wait_bh > wait) {
 		wait_on_buffer(*--wait_bh);
 		if (!buffer_uptodate(*wait_bh))
-			return -EIO;
+			err = -EIO;
 	}
-	return 0;
-out:
+	if (!err)
+		return err;
+
+	/* Error case: */
 	/*
 	 * Zero out any newly allocated blocks to avoid exposing stale
 	 * data.  If BH_New is set, we know that the block was newly
-- 
cgit v1.2.2


From ad576e63e0c8b274a8558b8e05a6d0526e804dc0 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Thu, 5 May 2005 16:15:46 -0700
Subject: [PATCH] __block_write_full_page race fix

When running
	fsstress -v -d $DIR/tmp -n 1000 -p 1000 -l 2
on an ext2 filesystem with 1024 byte block size, on SMP i386 with 4096 byte
page size over loopback to an image file on a tmpfs filesystem, I would
very quickly hit
	BUG_ON(!buffer_async_write(bh));
in fs/buffer.c:end_buffer_async_write

It seems that more than one request would be submitted for a given bh
at a time.

What would happen is the following:
2 threads doing __mpage_writepages on the same page.
Thread 1 - lock the page first, and enter __block_write_full_page.
Thread 1 - (eg.) mark_buffer_async_write on the first 2 buffers.
Thread 1 - set page writeback, unlock page.
Thread 2 - lock page, wait on page writeback
Thread 1 - submit_bh on the first 2 buffers.
=> both requests complete, none of the page buffers are async_write,
   end_page_writeback is called.
Thread 2 - wakes up. enters __block_write_full_page.
Thread 2 - mark_buffer_async_write on (eg.) the last buffer
Thread 1 - finds the last buffer has async_write set, submit_bh on that.
Thread 2 - submit_bh on the last buffer.
=> oops.

So change __block_write_full_page to explicitly keep track of the last bh
we need to issue, so we don't touch anything after issuing the last
request.

Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'fs/buffer.c')

diff --git a/fs/buffer.c b/fs/buffer.c
index af7c51ded2e1..bc75f2e7b274 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1751,7 +1751,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	int err;
 	sector_t block;
 	sector_t last_block;
-	struct buffer_head *bh, *head;
+	struct buffer_head *bh, *head, *last_bh = NULL;
 	int nr_underway = 0;
 
 	BUG_ON(!PageLocked(page));
@@ -1809,7 +1809,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	} while (bh != head);
 
 	do {
-		get_bh(bh);
 		if (!buffer_mapped(bh))
 			continue;
 		/*
@@ -1827,6 +1826,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		}
 		if (test_clear_buffer_dirty(bh)) {
 			mark_buffer_async_write(bh);
+			get_bh(bh);
+			last_bh = bh;
 		} else {
 			unlock_buffer(bh);
 		}
@@ -1845,10 +1846,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		if (buffer_async_write(bh)) {
 			submit_bh(WRITE, bh);
 			nr_underway++;
+			put_bh(bh);
+			if (bh == last_bh)
+				break;
 		}
-		put_bh(bh);
 		bh = next;
 	} while (bh != head);
+	bh = head;
 
 	err = 0;
 done:
@@ -1887,10 +1891,11 @@ recover:
 	bh = head;
 	/* Recovery: lock and submit the mapped buffers */
 	do {
-		get_bh(bh);
 		if (buffer_mapped(bh) && buffer_dirty(bh)) {
 			lock_buffer(bh);
 			mark_buffer_async_write(bh);
+			get_bh(bh);
+			last_bh = bh;
 		} else {
 			/*
 			 * The buffer may have been set dirty during
@@ -1909,10 +1914,13 @@ recover:
 			clear_buffer_dirty(bh);
 			submit_bh(WRITE, bh);
 			nr_underway++;
+			put_bh(bh);
+			if (bh == last_bh)
+				break;
 		}
-		put_bh(bh);
 		bh = next;
 	} while (bh != head);
+	bh = head;
 	goto done;
 }
 
-- 
cgit v1.2.2


From 05937baae9fc27b64bcd4378da7d2b14edf7931c Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 5 May 2005 16:15:47 -0700
Subject: [PATCH] __block_write_full_page speedup

Remove all those get_bh()'s and put_bh()'s by extending lock_page() to cover
the troublesome regions.

(get_bh() and put_bh() happen every time whereas contention on a page's lock
in there happens basically never).

Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs/buffer.c')

diff --git a/fs/buffer.c b/fs/buffer.c
index bc75f2e7b274..6f2c3303a443 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1826,7 +1826,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		}
 		if (test_clear_buffer_dirty(bh)) {
 			mark_buffer_async_write(bh);
-			get_bh(bh);
 			last_bh = bh;
 		} else {
 			unlock_buffer(bh);
@@ -1839,20 +1838,19 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	 */
 	BUG_ON(PageWriteback(page));
 	set_page_writeback(page);
-	unlock_page(page);
 
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			submit_bh(WRITE, bh);
 			nr_underway++;
-			put_bh(bh);
 			if (bh == last_bh)
 				break;
 		}
 		bh = next;
 	} while (bh != head);
 	bh = head;
+	unlock_page(page);
 
 	err = 0;
 done:
@@ -1894,7 +1892,6 @@ recover:
 		if (buffer_mapped(bh) && buffer_dirty(bh)) {
 			lock_buffer(bh);
 			mark_buffer_async_write(bh);
-			get_bh(bh);
 			last_bh = bh;
 		} else {
 			/*
@@ -1914,7 +1911,6 @@ recover:
 			clear_buffer_dirty(bh);
 			submit_bh(WRITE, bh);
 			nr_underway++;
-			put_bh(bh);
 			if (bh == last_bh)
 				break;
 		}
-- 
cgit v1.2.2


From f0fbd5fc09b20f7ba7bc8c80be33e39925bb38e1 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 5 May 2005 16:15:48 -0700
Subject: [PATCH] __block_write_full_page() simplification

The `last_bh' logic probably isn't worth much.  In those situations where only
the front part of the page is being written out we will save some looping but
in the vastly more common case of an all-page writeout if just adds more code.

Nick Piggin <nickpiggin@yahoo.com.au>

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'fs/buffer.c')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6f2c3303a443..91ace8034bf7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1751,7 +1751,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 	int err;
 	sector_t block;
 	sector_t last_block;
-	struct buffer_head *bh, *head, *last_bh = NULL;
+	struct buffer_head *bh, *head;
 	int nr_underway = 0;
 
 	BUG_ON(!PageLocked(page));
@@ -1826,7 +1826,6 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		}
 		if (test_clear_buffer_dirty(bh)) {
 			mark_buffer_async_write(bh);
-			last_bh = bh;
 		} else {
 			unlock_buffer(bh);
 		}
@@ -1844,12 +1843,9 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		if (buffer_async_write(bh)) {
 			submit_bh(WRITE, bh);
 			nr_underway++;
-			if (bh == last_bh)
-				break;
 		}
 		bh = next;
 	} while (bh != head);
-	bh = head;
 	unlock_page(page);
 
 	err = 0;
@@ -1892,7 +1888,6 @@ recover:
 		if (buffer_mapped(bh) && buffer_dirty(bh)) {
 			lock_buffer(bh);
 			mark_buffer_async_write(bh);
-			last_bh = bh;
 		} else {
 			/*
 			 * The buffer may have been set dirty during
@@ -1911,12 +1906,9 @@ recover:
 			clear_buffer_dirty(bh);
 			submit_bh(WRITE, bh);
 			nr_underway++;
-			if (bh == last_bh)
-				break;
 		}
 		bh = next;
 	} while (bh != head);
-	bh = head;
 	goto done;
 }
 
-- 
cgit v1.2.2


From 75c96f85845a6707b0f9916cb263cb3584f7d48f Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Thu, 5 May 2005 16:16:09 -0700
Subject: [PATCH] make some things static

This patch makes some needlessly global identifiers static.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Arjan van de Ven <arjanv@infradead.org>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/buffer.c')

diff --git a/fs/buffer.c b/fs/buffer.c
index 91ace8034bf7..6f88dcc6d002 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1210,7 +1210,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
 	return 1;
 }
 
-struct buffer_head *
+static struct buffer_head *
 __getblk_slow(struct block_device *bdev, sector_t block, int size)
 {
 	/* Size must be multiple of hard sectorsize */
-- 
cgit v1.2.2


From c64610ba585fabb36be78782868277f3d9741a2e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Mon, 16 May 2005 21:53:49 -0700
Subject: [PATCH] block_read_full_page() get_block() error handling fix

If block_read_full_page() detects an error when running get_block() it will
run SetPageError(), then it will zero out the block in pagecache and will mark
the buffer_head uptodate.

So at the end of readahead we end up with a non-uptodate pagecache page which
is marked PageError.  But it has uptodate buffers.

The pagefault code will run ClearPageError, will launch readpage a second time
and block_read_full_page() will notice the uptodate buffers and will mark the
page uptodate as well.  We end up with an uptodate, !PageError page full of
zeros and the error is lost.

(It seems a little odd that filemap_nopage() runs ClearPageError().  I guess
all of this adds up to meaning that for each attempted access to the page, the
pagefault handler will retry the I/O.  Which is good and bad.  If the app is
ignoring SIGBUS for some reason we could get a lot of back-to-back I/O
errors.)

Fix it by not marking the pagecache buffer_head as uptodate if the attempt to
map that buffer to a disk block failed.

Credit-to: Qu Fuping <fs@ercist.iscas.ac.cn>

  For reporting the bug and identifying its source.

Signed-off-by: Qu Fuping <fs@ercist.iscas.ac.cn>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs/buffer.c')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6f88dcc6d002..7e9e409feaa7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2094,9 +2094,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 			continue;
 
 		if (!buffer_mapped(bh)) {
+			int err = 0;
+
 			fully_mapped = 0;
 			if (iblock < lblock) {
-				if (get_block(inode, iblock, bh, 0))
+				err = get_block(inode, iblock, bh, 0);
+				if (err)
 					SetPageError(page);
 			}
 			if (!buffer_mapped(bh)) {
@@ -2104,7 +2107,8 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 				memset(kaddr + i * blocksize, 0, blocksize);
 				flush_dcache_page(page);
 				kunmap_atomic(kaddr, KM_USER0);
-				set_buffer_uptodate(bh);
+				if (!err)
+					set_buffer_uptodate(bh);
 				continue;
 			}
 			/*
-- 
cgit v1.2.2