1 files changed, 82 insertions, 56 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index b144fc367b8b..09bb80c479d8 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2274,51 +2274,64 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
        struct inode *inode = page->mapping->host;
        const unsigned blkbits = inode->i_blkbits;
        const unsigned blocksize = 1 << blkbits;
-        struct buffer_head map_bh;
+        struct buffer_head *head, *bh;
-        struct buffer_head *read_bh[MAX_BUF_PER_PAGE];
        unsigned block_in_page;
-        unsigned block_start;
+        unsigned block_start, block_end;
        sector_t block_in_file;
        char *kaddr;
        int nr_reads = 0;
-        int i;
        int ret = 0;
        int is_mapped_to_disk = 1;
+        if (page_has_buffers(page))
+                return block_prepare_write(page, from, to, get_block);
        if (PageMappedToDisk(page))
                return 0;
+        /*
+         * Allocate buffers so that we can keep track of state, and potentially
+         * attach them to the page if an error occurs. In the common case of
+         * no error, they will just be freed again without ever being attached
+         * to the page (which is all OK, because we're under the page lock).
+         *
+         * Be careful: the buffer linked list is a NULL terminated one, rather
+         * than the circular one we're used to.
+         */
+        head = alloc_page_buffers(page, blocksize, 0);
+        if (!head)
+                return -ENOMEM;
        block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
-        map_bh.b_page = page;
        /*
         * We loop across all blocks in the page, whether or not they are
         * part of the affected region.  This is so we can discover if the
         * page is fully mapped-to-disk.
         */
-        for (block_start = 0, block_in_page = 0;
+        for (block_start = 0, block_in_page = 0, bh = head;
                  block_start < PAGE_CACHE_SIZE;
-                  block_in_page++, block_start += blocksize) {
+                  block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
-                unsigned block_end = block_start + blocksize;
                int create;
-                map_bh.b_state = 0;
+                block_end = block_start + blocksize;
+                bh->b_state = 0;
                create = 1;
                if (block_start >= to)
                        create = 0;
-                map_bh.b_size = blocksize;
                ret = get_block(inode, block_in_file + block_in_page,
-                                        &map_bh, create);
+                                        bh, create);
                if (ret)
                        goto failed;
-                if (!buffer_mapped(&map_bh))
+                if (!buffer_mapped(bh))
                        is_mapped_to_disk = 0;
-                if (buffer_new(&map_bh))
+                if (buffer_new(bh))
-                        unmap_underlying_metadata(map_bh.b_bdev,
+                        unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
-                                                        map_bh.b_blocknr);
+                if (PageUptodate(page)) {
-                if (PageUptodate(page))
+                        set_buffer_uptodate(bh);
                        continue;
-                if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) {
+                }
+                if (buffer_new(bh) || !buffer_mapped(bh)) {
                        kaddr = kmap_atomic(page, KM_USER0);
                        if (block_start < from)
                                memset(kaddr+block_start, 0, from-block_start);
@@ -2328,49 +2341,26 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
                        kunmap_atomic(kaddr, KM_USER0);
                        continue;
                }
-                if (buffer_uptodate(&map_bh))
+                if (buffer_uptodate(bh))
                        continue;       /* reiserfs does this */
                if (block_start < from || block_end > to) {
-                        struct buffer_head *bh = alloc_buffer_head(GFP_NOFS);
+                        lock_buffer(bh);
+                        bh->b_end_io = end_buffer_read_nobh;
-                        if (!bh) {
+                        submit_bh(READ, bh);
-                                ret = -ENOMEM;
+                        nr_reads++;
-                                goto failed;
-                        }
-                        bh->b_state = map_bh.b_state;
-                        atomic_set(&bh->b_count, 0);
-                        bh->b_this_page = NULL;
-                        bh->b_page = page;
-                        bh->b_blocknr = map_bh.b_blocknr;
-                        bh->b_size = blocksize;
-                        bh->b_data = (char *)(long)block_start;
-                        bh->b_bdev = map_bh.b_bdev;
-                        bh->b_private = NULL;
-                        read_bh[nr_reads++] = bh;
                }
        }
        if (nr_reads) {
-                struct buffer_head *bh;
                /*
                 * The page is locked, so these buffers are protected from
                 * any VM or truncate activity.  Hence we don't need to care
                 * for the buffer_head refcounts.
                 */
-                for (i = 0; i < nr_reads; i++) {
+                for (bh = head; bh; bh = bh->b_this_page) {
-                        bh = read_bh[i];
-                        lock_buffer(bh);
-                        bh->b_end_io = end_buffer_read_nobh;
-                        submit_bh(READ, bh);
-                }
-                for (i = 0; i < nr_reads; i++) {
-                        bh = read_bh[i];
                        wait_on_buffer(bh);
                        if (!buffer_uptodate(bh))
                                ret = -EIO;
-                        free_buffer_head(bh);
-                        read_bh[i] = NULL;
                }
                if (ret)
                        goto failed;
@@ -2379,21 +2369,54 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
        if (is_mapped_to_disk)
                SetPageMappedToDisk(page);
+        do {
+                bh = head;
+                head = head->b_this_page;
+                free_buffer_head(bh);
+        } while (head);
        return 0;
 failed:
-        for (i = 0; i < nr_reads; i++) {
-                if (read_bh[i])
-                        free_buffer_head(read_bh[i]);
-        }
        /*
-         * Error recovery is pretty slack.  Clear the page and mark it dirty
+         * Error recovery is a bit difficult. We need to zero out blocks that
-         * so we'll later zero out any blocks which _were_ allocated.
+         * were newly allocated, and dirty them to ensure they get written out.
+         * Buffers need to be attached to the page at this point, otherwise
+         * the handling of potential IO errors during writeout would be hard
+         * (could try doing synchronous writeout, but what if that fails too?)
         */
-        zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
+        spin_lock(&page->mapping->private_lock);
-        SetPageUptodate(page);
+        bh = head;
-        set_page_dirty(page);
+        block_start = 0;
+        do {
+                if (PageUptodate(page))
+                        set_buffer_uptodate(bh);
+                if (PageDirty(page))
+                        set_buffer_dirty(bh);
+                block_end = block_start+blocksize;
+                if (block_end <= from)
+                        goto next;
+                if (block_start >= to)
+                        goto next;
+                if (buffer_new(bh)) {
+                        clear_buffer_new(bh);
+                        if (!buffer_uptodate(bh)) {
+                                zero_user_page(page, block_start, bh->b_size, KM_USER0);
+                                set_buffer_uptodate(bh);
+                        }
+                        mark_buffer_dirty(bh);
+                }
+next:
+                block_start = block_end;
+                if (!bh->b_this_page)
+                        bh->b_this_page = head;
+                bh = bh->b_this_page;
+        } while (bh != head);
+        attach_page_buffers(page, head);
+        spin_unlock(&page->mapping->private_lock);
        return ret;
 }
 EXPORT_SYMBOL(nobh_prepare_write);
@@ -2408,6 +2431,9 @@ int nobh_commit_write(struct file *file, struct page *page,
        struct inode *inode = page->mapping->host;
        loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+        if (page_has_buffers(page))
+                return generic_commit_write(file, page, from, to);
        SetPageUptodate(page);
        set_page_dirty(page);
        if (pos > inode->i_size) {

diff --git a/fs/buffer.c b/fs/buffer.c index b144fc367b8b..09bb80c479d8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c
@@ -2274,51 +2274,64 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2274	struct inode *inode = page->mapping->host;	2274	struct inode *inode = page->mapping->host;
2275	const unsigned blkbits = inode->i_blkbits;	2275	const unsigned blkbits = inode->i_blkbits;
2276	const unsigned blocksize = 1 << blkbits;	2276	const unsigned blocksize = 1 << blkbits;
2277	struct buffer_head map_bh;	2277	struct buffer_head head, bh;
2278	struct buffer_head *read_bh[MAX_BUF_PER_PAGE];
2279	unsigned block_in_page;	2278	unsigned block_in_page;
2280	unsigned block_start;	2279	unsigned block_start, block_end;
2281	sector_t block_in_file;	2280	sector_t block_in_file;
2282	char *kaddr;	2281	char *kaddr;
2283	int nr_reads = 0;	2282	int nr_reads = 0;
2284	int i;
2285	int ret = 0;	2283	int ret = 0;
2286	int is_mapped_to_disk = 1;	2284	int is_mapped_to_disk = 1;
2287		2285
		2286	if (page_has_buffers(page))
		2287	return block_prepare_write(page, from, to, get_block);
		2288
2288	if (PageMappedToDisk(page))	2289	if (PageMappedToDisk(page))
2289	return 0;	2290	return 0;
2290		2291
		2292	/*
		2293	* Allocate buffers so that we can keep track of state, and potentially
		2294	* attach them to the page if an error occurs. In the common case of
		2295	* no error, they will just be freed again without ever being attached
		2296	* to the page (which is all OK, because we're under the page lock).
		2297	*
		2298	* Be careful: the buffer linked list is a NULL terminated one, rather
		2299	* than the circular one we're used to.
		2300	*/
		2301	head = alloc_page_buffers(page, blocksize, 0);
		2302	if (!head)
		2303	return -ENOMEM;
		2304
2291	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);	2305	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
2292	map_bh.b_page = page;
2293		2306
2294	/*	2307	/*
2295	* We loop across all blocks in the page, whether or not they are	2308	* We loop across all blocks in the page, whether or not they are
2296	* part of the affected region. This is so we can discover if the	2309	* part of the affected region. This is so we can discover if the
2297	* page is fully mapped-to-disk.	2310	* page is fully mapped-to-disk.
2298	*/	2311	*/
2299	for (block_start = 0, block_in_page = 0;	2312	for (block_start = 0, block_in_page = 0, bh = head;
2300	block_start < PAGE_CACHE_SIZE;	2313	block_start < PAGE_CACHE_SIZE;
2301	block_in_page++, block_start += blocksize) {	2314	block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
2302	unsigned block_end = block_start + blocksize;
2303	int create;	2315	int create;
2304		2316
2305	map_bh.b_state = 0;	2317	block_end = block_start + blocksize;
		2318	bh->b_state = 0;
2306	create = 1;	2319	create = 1;
2307	if (block_start >= to)	2320	if (block_start >= to)
2308	create = 0;	2321	create = 0;
2309	map_bh.b_size = blocksize;
2310	ret = get_block(inode, block_in_file + block_in_page,	2322	ret = get_block(inode, block_in_file + block_in_page,
2311	&map_bh, create);	2323	bh, create);
2312	if (ret)	2324	if (ret)
2313	goto failed;	2325	goto failed;
2314	if (!buffer_mapped(&map_bh))	2326	if (!buffer_mapped(bh))
2315	is_mapped_to_disk = 0;	2327	is_mapped_to_disk = 0;
2316	if (buffer_new(&map_bh))	2328	if (buffer_new(bh))
2317	unmap_underlying_metadata(map_bh.b_bdev,	2329	unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
2318	map_bh.b_blocknr);	2330	if (PageUptodate(page)) {
2319	if (PageUptodate(page))	2331	set_buffer_uptodate(bh);
2320	continue;	2332	continue;
2321	if (buffer_new(&map_bh) \|\| !buffer_mapped(&map_bh)) {	2333	}
		2334	if (buffer_new(bh) \|\| !buffer_mapped(bh)) {
2322	kaddr = kmap_atomic(page, KM_USER0);	2335	kaddr = kmap_atomic(page, KM_USER0);
2323	if (block_start < from)	2336	if (block_start < from)
2324	memset(kaddr+block_start, 0, from-block_start);	2337	memset(kaddr+block_start, 0, from-block_start);
@@ -2328,49 +2341,26 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2328	kunmap_atomic(kaddr, KM_USER0);	2341	kunmap_atomic(kaddr, KM_USER0);
2329	continue;	2342	continue;
2330	}	2343	}
2331	if (buffer_uptodate(&map_bh))	2344	if (buffer_uptodate(bh))
2332	continue; /* reiserfs does this */	2345	continue; /* reiserfs does this */
2333	if (block_start < from \|\| block_end > to) {	2346	if (block_start < from \|\| block_end > to) {
2334	struct buffer_head *bh = alloc_buffer_head(GFP_NOFS);	2347	lock_buffer(bh);
2335		2348	bh->b_end_io = end_buffer_read_nobh;
2336	if (!bh) {	2349	submit_bh(READ, bh);
2337	ret = -ENOMEM;	2350	nr_reads++;
2338	goto failed;
2339	}
2340	bh->b_state = map_bh.b_state;
2341	atomic_set(&bh->b_count, 0);
2342	bh->b_this_page = NULL;
2343	bh->b_page = page;
2344	bh->b_blocknr = map_bh.b_blocknr;
2345	bh->b_size = blocksize;
2346	bh->b_data = (char *)(long)block_start;
2347	bh->b_bdev = map_bh.b_bdev;
2348	bh->b_private = NULL;
2349	read_bh[nr_reads++] = bh;
2350	}	2351	}
2351	}	2352	}
2352		2353
2353	if (nr_reads) {	2354	if (nr_reads) {
2354	struct buffer_head *bh;
2355
2356	/*	2355	/*
2357	* The page is locked, so these buffers are protected from	2356	* The page is locked, so these buffers are protected from
2358	* any VM or truncate activity. Hence we don't need to care	2357	* any VM or truncate activity. Hence we don't need to care
2359	* for the buffer_head refcounts.	2358	* for the buffer_head refcounts.
2360	*/	2359	*/
2361	for (i = 0; i < nr_reads; i++) {	2360	for (bh = head; bh; bh = bh->b_this_page) {
2362	bh = read_bh[i];
2363	lock_buffer(bh);
2364	bh->b_end_io = end_buffer_read_nobh;
2365	submit_bh(READ, bh);
2366	}
2367	for (i = 0; i < nr_reads; i++) {
2368	bh = read_bh[i];
2369	wait_on_buffer(bh);	2361	wait_on_buffer(bh);
2370	if (!buffer_uptodate(bh))	2362	if (!buffer_uptodate(bh))
2371	ret = -EIO;	2363	ret = -EIO;
2372	free_buffer_head(bh);
2373	read_bh[i] = NULL;
2374	}	2364	}
2375	if (ret)	2365	if (ret)
2376	goto failed;	2366	goto failed;
@@ -2379,21 +2369,54 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
2379	if (is_mapped_to_disk)	2369	if (is_mapped_to_disk)
2380	SetPageMappedToDisk(page);	2370	SetPageMappedToDisk(page);
2381		2371
		2372	do {
		2373	bh = head;
		2374	head = head->b_this_page;
		2375	free_buffer_head(bh);
		2376	} while (head);
		2377
2382	return 0;	2378	return 0;
2383		2379
2384	failed:	2380	failed:
2385	for (i = 0; i < nr_reads; i++) {
2386	if (read_bh[i])
2387	free_buffer_head(read_bh[i]);
2388	}
2389
2390	/*	2381	/*
2391	* Error recovery is pretty slack. Clear the page and mark it dirty	2382	* Error recovery is a bit difficult. We need to zero out blocks that
2392	* so we'll later zero out any blocks which _were_ allocated.	2383	* were newly allocated, and dirty them to ensure they get written out.
		2384	* Buffers need to be attached to the page at this point, otherwise
		2385	* the handling of potential IO errors during writeout would be hard
		2386	* (could try doing synchronous writeout, but what if that fails too?)
2393	*/	2387	*/
2394	zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);	2388	spin_lock(&page->mapping->private_lock);
2395	SetPageUptodate(page);	2389	bh = head;
2396	set_page_dirty(page);	2390	block_start = 0;
		2391	do {
		2392	if (PageUptodate(page))
		2393	set_buffer_uptodate(bh);
		2394	if (PageDirty(page))
		2395	set_buffer_dirty(bh);
		2396
		2397	block_end = block_start+blocksize;
		2398	if (block_end <= from)
		2399	goto next;
		2400	if (block_start >= to)
		2401	goto next;
		2402
		2403	if (buffer_new(bh)) {
		2404	clear_buffer_new(bh);
		2405	if (!buffer_uptodate(bh)) {
		2406	zero_user_page(page, block_start, bh->b_size, KM_USER0);
		2407	set_buffer_uptodate(bh);
		2408	}
		2409	mark_buffer_dirty(bh);
		2410	}
		2411	next:
		2412	block_start = block_end;
		2413	if (!bh->b_this_page)
		2414	bh->b_this_page = head;
		2415	bh = bh->b_this_page;
		2416	} while (bh != head);
		2417	attach_page_buffers(page, head);
		2418	spin_unlock(&page->mapping->private_lock);
		2419
2397	return ret;	2420	return ret;
2398	}	2421	}
2399	EXPORT_SYMBOL(nobh_prepare_write);	2422	EXPORT_SYMBOL(nobh_prepare_write);
@@ -2408,6 +2431,9 @@ int nobh_commit_write(struct file file, struct page page,
2408	struct inode *inode = page->mapping->host;	2431	struct inode *inode = page->mapping->host;
2409	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;	2432	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2410		2433
		2434	if (page_has_buffers(page))
		2435	return generic_commit_write(file, page, from, to);
		2436
2411	SetPageUptodate(page);	2437	SetPageUptodate(page);
2412	set_page_dirty(page);	2438	set_page_dirty(page);
2413	if (pos > inode->i_size) {	2439	if (pos > inode->i_size) {